duckdb 0.5.1-dev29.0 → 0.5.1-dev291.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +819 -277
- package/src/duckdb.hpp +148 -11
- package/src/parquet-amalgamation.cpp +35963 -35937
package/src/duckdb.cpp
CHANGED
|
@@ -620,7 +620,88 @@ public:
|
|
|
620
620
|
|
|
621
621
|
} // namespace duckdb
|
|
622
622
|
|
|
623
|
+
//===----------------------------------------------------------------------===//
|
|
624
|
+
// DuckDB
|
|
625
|
+
//
|
|
626
|
+
// extension_functions.hpp
|
|
627
|
+
//
|
|
628
|
+
//
|
|
629
|
+
//===----------------------------------------------------------------------===//
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
namespace duckdb {
|
|
623
636
|
|
|
637
|
+
struct ExtensionFunction {
|
|
638
|
+
char function[48];
|
|
639
|
+
char extension[48];
|
|
640
|
+
};
|
|
641
|
+
|
|
642
|
+
static constexpr ExtensionFunction EXTENSION_FUNCTIONS[] = {
|
|
643
|
+
{"->>", "json"},
|
|
644
|
+
{"array_to_json", "json"},
|
|
645
|
+
{"create_fts_index", "fts"},
|
|
646
|
+
{"dbgen", "tpch"},
|
|
647
|
+
{"drop_fts_index", "fts"},
|
|
648
|
+
{"dsdgen", "tpcds"},
|
|
649
|
+
{"excel_text", "excel"},
|
|
650
|
+
{"from_json", "json"},
|
|
651
|
+
{"from_json_strict", "json"},
|
|
652
|
+
{"from_substrait", "substrait"},
|
|
653
|
+
{"get_substrait", "substrait"},
|
|
654
|
+
{"get_substrait_json", "substrait"},
|
|
655
|
+
{"icu_calendar_names", "icu"},
|
|
656
|
+
{"icu_sort_key", "icu"},
|
|
657
|
+
{"json", "json"},
|
|
658
|
+
{"json_array", "json"},
|
|
659
|
+
{"json_array_length", "json"},
|
|
660
|
+
{"json_extract", "json"},
|
|
661
|
+
{"json_extract_path", "json"},
|
|
662
|
+
{"json_extract_path_text", "json"},
|
|
663
|
+
{"json_extract_string", "json"},
|
|
664
|
+
{"json_group_array", "json"},
|
|
665
|
+
{"json_group_object", "json"},
|
|
666
|
+
{"json_group_structure", "json"},
|
|
667
|
+
{"json_merge_patch", "json"},
|
|
668
|
+
{"json_object", "json"},
|
|
669
|
+
{"json_quote", "json"},
|
|
670
|
+
{"json_structure", "json"},
|
|
671
|
+
{"json_transform", "json"},
|
|
672
|
+
{"json_transform_strict", "json"},
|
|
673
|
+
{"json_type", "json"},
|
|
674
|
+
{"json_valid", "json"},
|
|
675
|
+
{"make_timestamptz", "icu"},
|
|
676
|
+
{"parquet_metadata", "parquet"},
|
|
677
|
+
{"parquet_scan", "parquet"},
|
|
678
|
+
{"parquet_schema", "parquet"},
|
|
679
|
+
{"pg_timezone_names", "icu"},
|
|
680
|
+
{"postgres_attach", "postgres_scanner"},
|
|
681
|
+
{"postgres_scan", "postgres_scanner"},
|
|
682
|
+
{"postgres_scan_pushdown", "postgres_scanner"},
|
|
683
|
+
{"read_json_objects", "json"},
|
|
684
|
+
{"read_ndjson_objects", "json"},
|
|
685
|
+
{"read_parquet", "parquet"},
|
|
686
|
+
{"row_to_json", "json"},
|
|
687
|
+
{"sqlite_attach", "sqlite_scanner"},
|
|
688
|
+
{"sqlite_scan", "sqlite_scanner"},
|
|
689
|
+
{"stem", "fts"},
|
|
690
|
+
{"text", "excel"},
|
|
691
|
+
{"to_json", "json"},
|
|
692
|
+
{"tpcds", "tpcds"},
|
|
693
|
+
{"tpcds_answers", "tpcds"},
|
|
694
|
+
{"tpcds_queries", "tpcds"},
|
|
695
|
+
{"tpch", "tpch"},
|
|
696
|
+
{"tpch_answers", "tpch"},
|
|
697
|
+
{"tpch_queries", "tpch"},
|
|
698
|
+
{"visualize_diff_profiling_output", "visualizer"},
|
|
699
|
+
{"visualize_json_profiling_output", "visualizer"},
|
|
700
|
+
{"visualize_last_profiling_output", "visualizer"},
|
|
701
|
+
};
|
|
702
|
+
} // namespace duckdb
|
|
703
|
+
|
|
704
|
+
#include <algorithm>
|
|
624
705
|
namespace duckdb {
|
|
625
706
|
|
|
626
707
|
string SimilarCatalogEntry::GetQualifiedName() const {
|
|
@@ -823,6 +904,16 @@ SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const
|
|
|
823
904
|
return {most_similar.first, most_similar.second, schema_of_most_similar};
|
|
824
905
|
}
|
|
825
906
|
|
|
907
|
+
string FindExtension(const string &function_name) {
|
|
908
|
+
auto size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionFunction);
|
|
909
|
+
auto it = std::lower_bound(
|
|
910
|
+
EXTENSION_FUNCTIONS, EXTENSION_FUNCTIONS + size, function_name,
|
|
911
|
+
[](const ExtensionFunction &element, const string &value) { return element.function < value; });
|
|
912
|
+
if (it != EXTENSION_FUNCTIONS + size && it->function == function_name) {
|
|
913
|
+
return it->extension;
|
|
914
|
+
}
|
|
915
|
+
return "";
|
|
916
|
+
}
|
|
826
917
|
CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name,
|
|
827
918
|
CatalogType type, const vector<SchemaCatalogEntry *> &schemas,
|
|
828
919
|
QueryErrorContext error_context) {
|
|
@@ -836,7 +927,12 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
|
|
|
836
927
|
}
|
|
837
928
|
});
|
|
838
929
|
auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
|
|
839
|
-
|
|
930
|
+
auto extension_name = FindExtension(entry_name);
|
|
931
|
+
if (!extension_name.empty()) {
|
|
932
|
+
return CatalogException("Function with name %s is not on the catalog, but it exists in the %s extension. To "
|
|
933
|
+
"Install and Load the extension, run: INSTALL %s; LOAD %s;",
|
|
934
|
+
entry_name, extension_name, extension_name, extension_name);
|
|
935
|
+
}
|
|
840
936
|
string did_you_mean;
|
|
841
937
|
if (unseen_entry.Found() && unseen_entry.distance < entry.distance) {
|
|
842
938
|
did_you_mean = "\nDid you mean \"" + unseen_entry.GetQualifiedName() + "\"?";
|
|
@@ -3667,6 +3763,19 @@ idx_t TableCatalogEntry::StandardColumnCount() const {
|
|
|
3667
3763
|
return count;
|
|
3668
3764
|
}
|
|
3669
3765
|
|
|
3766
|
+
unique_ptr<BaseStatistics> TableCatalogEntry::GetStatistics(ClientContext &context, column_t column_id) {
|
|
3767
|
+
if (column_id == COLUMN_IDENTIFIER_ROW_ID) {
|
|
3768
|
+
return nullptr;
|
|
3769
|
+
}
|
|
3770
|
+
if (column_id >= columns.size()) {
|
|
3771
|
+
throw InternalException("TableCatalogEntry::GetStatistics column_id out of range");
|
|
3772
|
+
}
|
|
3773
|
+
if (columns[column_id].Generated()) {
|
|
3774
|
+
return nullptr;
|
|
3775
|
+
}
|
|
3776
|
+
return storage->GetStatistics(context, columns[column_id].StorageOid());
|
|
3777
|
+
}
|
|
3778
|
+
|
|
3670
3779
|
unique_ptr<CatalogEntry> TableCatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) {
|
|
3671
3780
|
D_ASSERT(!internal);
|
|
3672
3781
|
if (info->type != AlterType::ALTER_TABLE) {
|
|
@@ -3734,6 +3843,9 @@ static void RenameExpression(ParsedExpression &expr, RenameColumnInfo &info) {
|
|
|
3734
3843
|
|
|
3735
3844
|
unique_ptr<CatalogEntry> TableCatalogEntry::RenameColumn(ClientContext &context, RenameColumnInfo &info) {
|
|
3736
3845
|
auto rename_idx = GetColumnIndex(info.old_name);
|
|
3846
|
+
if (rename_idx == COLUMN_IDENTIFIER_ROW_ID) {
|
|
3847
|
+
throw CatalogException("Cannot rename rowid column");
|
|
3848
|
+
}
|
|
3737
3849
|
auto create_info = make_unique<CreateTableInfo>(schema->name, name);
|
|
3738
3850
|
create_info->temporary = temporary;
|
|
3739
3851
|
for (idx_t i = 0; i < columns.size(); i++) {
|
|
@@ -3836,6 +3948,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::AddColumn(ClientContext &context, Ad
|
|
|
3836
3948
|
unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context, RemoveColumnInfo &info) {
|
|
3837
3949
|
auto removed_index = GetColumnIndex(info.removed_column, info.if_column_exists);
|
|
3838
3950
|
if (removed_index == DConstants::INVALID_INDEX) {
|
|
3951
|
+
if (!info.if_column_exists) {
|
|
3952
|
+
throw CatalogException("Cannot drop column: rowid column cannot be dropped");
|
|
3953
|
+
}
|
|
3839
3954
|
return nullptr;
|
|
3840
3955
|
}
|
|
3841
3956
|
|
|
@@ -3942,7 +4057,7 @@ unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context,
|
|
|
3942
4057
|
return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
|
|
3943
4058
|
storage);
|
|
3944
4059
|
}
|
|
3945
|
-
auto new_storage = make_shared<DataTable>(context, *storage, removed_index);
|
|
4060
|
+
auto new_storage = make_shared<DataTable>(context, *storage, columns[removed_index].StorageOid());
|
|
3946
4061
|
return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
|
|
3947
4062
|
new_storage);
|
|
3948
4063
|
}
|
|
@@ -3950,13 +4065,18 @@ unique_ptr<CatalogEntry> TableCatalogEntry::RemoveColumn(ClientContext &context,
|
|
|
3950
4065
|
unique_ptr<CatalogEntry> TableCatalogEntry::SetDefault(ClientContext &context, SetDefaultInfo &info) {
|
|
3951
4066
|
auto create_info = make_unique<CreateTableInfo>(schema->name, name);
|
|
3952
4067
|
auto default_idx = GetColumnIndex(info.column_name);
|
|
4068
|
+
if (default_idx == COLUMN_IDENTIFIER_ROW_ID) {
|
|
4069
|
+
throw CatalogException("Cannot SET DEFAULT for rowid column");
|
|
4070
|
+
}
|
|
3953
4071
|
|
|
3954
4072
|
// Copy all the columns, changing the value of the one that was specified by 'column_name'
|
|
3955
4073
|
for (idx_t i = 0; i < columns.size(); i++) {
|
|
3956
4074
|
auto copy = columns[i].Copy();
|
|
3957
4075
|
if (default_idx == i) {
|
|
3958
4076
|
// set the default value of this column
|
|
3959
|
-
|
|
4077
|
+
if (copy.Generated()) {
|
|
4078
|
+
throw BinderException("Cannot SET DEFAULT for generated column \"%s\"", columns[i].Name());
|
|
4079
|
+
}
|
|
3960
4080
|
copy.SetDefaultValue(info.expression ? info.expression->Copy() : nullptr);
|
|
3961
4081
|
}
|
|
3962
4082
|
create_info->columns.push_back(move(copy));
|
|
@@ -3981,6 +4101,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
|
|
|
3981
4101
|
}
|
|
3982
4102
|
|
|
3983
4103
|
idx_t not_null_idx = GetColumnIndex(info.column_name);
|
|
4104
|
+
if (columns[not_null_idx].Generated()) {
|
|
4105
|
+
throw BinderException("Unsupported constraint for generated column!");
|
|
4106
|
+
}
|
|
3984
4107
|
bool has_not_null = false;
|
|
3985
4108
|
for (idx_t i = 0; i < constraints.size(); i++) {
|
|
3986
4109
|
auto constraint = constraints[i]->Copy();
|
|
@@ -4004,8 +4127,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetNotNull(ClientContext &context, S
|
|
|
4004
4127
|
storage);
|
|
4005
4128
|
}
|
|
4006
4129
|
|
|
4007
|
-
// Return with new storage info
|
|
4008
|
-
auto new_storage = make_shared<DataTable>(context, *storage,
|
|
4130
|
+
// Return with new storage info. Note that we need the bound column index here.
|
|
4131
|
+
auto new_storage = make_shared<DataTable>(context, *storage,
|
|
4132
|
+
make_unique<BoundNotNullConstraint>(columns[not_null_idx].StorageOid()));
|
|
4009
4133
|
return make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(),
|
|
4010
4134
|
new_storage);
|
|
4011
4135
|
}
|
|
@@ -4111,12 +4235,19 @@ unique_ptr<CatalogEntry> TableCatalogEntry::ChangeColumnType(ClientContext &cont
|
|
|
4111
4235
|
auto expression = info.expression->Copy();
|
|
4112
4236
|
auto bound_expression = expr_binder.Bind(expression);
|
|
4113
4237
|
auto bound_create_info = binder->BindCreateTableInfo(move(create_info));
|
|
4238
|
+
vector<column_t> storage_oids;
|
|
4114
4239
|
if (bound_columns.empty()) {
|
|
4115
|
-
|
|
4240
|
+
storage_oids.push_back(COLUMN_IDENTIFIER_ROW_ID);
|
|
4241
|
+
}
|
|
4242
|
+
// transform to storage_oid
|
|
4243
|
+
else {
|
|
4244
|
+
for (idx_t i = 0; i < bound_columns.size(); i++) {
|
|
4245
|
+
storage_oids.push_back(columns[bound_columns[i]].StorageOid());
|
|
4246
|
+
}
|
|
4116
4247
|
}
|
|
4117
4248
|
|
|
4118
|
-
auto new_storage =
|
|
4119
|
-
|
|
4249
|
+
auto new_storage = make_shared<DataTable>(context, *storage, columns[change_idx].StorageOid(), info.target_type,
|
|
4250
|
+
move(storage_oids), *bound_expression);
|
|
4120
4251
|
auto result =
|
|
4121
4252
|
make_unique<TableCatalogEntry>(catalog, schema, (BoundCreateTableInfo *)bound_create_info.get(), new_storage);
|
|
4122
4253
|
return move(result);
|
|
@@ -4364,7 +4495,7 @@ void TableCatalogEntry::CommitAlter(AlterInfo &info) {
|
|
|
4364
4495
|
}
|
|
4365
4496
|
}
|
|
4366
4497
|
D_ASSERT(removed_index != DConstants::INVALID_INDEX);
|
|
4367
|
-
storage->CommitDropColumn(removed_index);
|
|
4498
|
+
storage->CommitDropColumn(columns[removed_index].StorageOid());
|
|
4368
4499
|
}
|
|
4369
4500
|
|
|
4370
4501
|
void TableCatalogEntry::CommitDrop() {
|
|
@@ -4934,11 +5065,13 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
|
|
|
4934
5065
|
throw CatalogException(rename_err_msg, original_name, value->name);
|
|
4935
5066
|
}
|
|
4936
5067
|
}
|
|
5068
|
+
}
|
|
5069
|
+
|
|
5070
|
+
if (value->name != original_name) {
|
|
5071
|
+
// Do PutMapping and DeleteMapping after dependency check
|
|
4937
5072
|
PutMapping(context, value->name, entry_index);
|
|
4938
5073
|
DeleteMapping(context, original_name);
|
|
4939
5074
|
}
|
|
4940
|
-
//! Check the dependency manager to verify that there are no conflicting dependencies with this alter
|
|
4941
|
-
catalog.dependency_manager->AlterObject(context, entry, value.get());
|
|
4942
5075
|
|
|
4943
5076
|
value->timestamp = transaction.transaction_id;
|
|
4944
5077
|
value->child = move(entries[entry_index]);
|
|
@@ -4950,10 +5083,18 @@ bool CatalogSet::AlterEntry(ClientContext &context, const string &name, AlterInf
|
|
|
4950
5083
|
alter_info->Serialize(serializer);
|
|
4951
5084
|
BinaryData serialized_alter = serializer.GetData();
|
|
4952
5085
|
|
|
5086
|
+
auto new_entry = value.get();
|
|
5087
|
+
|
|
4953
5088
|
// push the old entry in the undo buffer for this transaction
|
|
4954
5089
|
transaction.PushCatalogEntry(value->child.get(), serialized_alter.data.get(), serialized_alter.size);
|
|
4955
5090
|
entries[entry_index] = move(value);
|
|
4956
5091
|
|
|
5092
|
+
// Check the dependency manager to verify that there are no conflicting dependencies with this alter
|
|
5093
|
+
// Note that we do this AFTER the new entry has been entirely set up in the catalog set
|
|
5094
|
+
// that is because in case the alter fails because of a dependency conflict, we need to be able to cleanly roll back
|
|
5095
|
+
// to the old entry.
|
|
5096
|
+
catalog.dependency_manager->AlterObject(context, entry, new_entry);
|
|
5097
|
+
|
|
4957
5098
|
return true;
|
|
4958
5099
|
}
|
|
4959
5100
|
|
|
@@ -6506,7 +6647,7 @@ static void GetBitPosition(idx_t row_idx, idx_t ¤t_byte, uint8_t ¤t_
|
|
|
6506
6647
|
}
|
|
6507
6648
|
|
|
6508
6649
|
static void UnsetBit(uint8_t *data, idx_t current_byte, uint8_t current_bit) {
|
|
6509
|
-
data[current_byte] &= ~(1 << current_bit);
|
|
6650
|
+
data[current_byte] &= ~((uint64_t)1 << current_bit);
|
|
6510
6651
|
}
|
|
6511
6652
|
|
|
6512
6653
|
static void NextBit(idx_t ¤t_byte, uint8_t ¤t_bit) {
|
|
@@ -9450,6 +9591,8 @@ void Exception::ThrowAsTypeWithMessage(ExceptionType type, const string &message
|
|
|
9450
9591
|
throw ParameterNotAllowedException(message);
|
|
9451
9592
|
case ExceptionType::PARAMETER_NOT_RESOLVED:
|
|
9452
9593
|
throw ParameterNotResolvedException();
|
|
9594
|
+
case ExceptionType::FATAL:
|
|
9595
|
+
throw FatalException(message);
|
|
9453
9596
|
default:
|
|
9454
9597
|
throw Exception(type, message);
|
|
9455
9598
|
}
|
|
@@ -16715,9 +16858,15 @@ string FileSystem::ConvertSeparators(const string &path) {
|
|
|
16715
16858
|
}
|
|
16716
16859
|
|
|
16717
16860
|
string FileSystem::ExtractBaseName(const string &path) {
|
|
16861
|
+
if (path.empty()) {
|
|
16862
|
+
return string();
|
|
16863
|
+
}
|
|
16718
16864
|
auto normalized_path = ConvertSeparators(path);
|
|
16719
16865
|
auto sep = PathSeparator();
|
|
16720
|
-
auto
|
|
16866
|
+
auto splits = StringUtil::Split(normalized_path, sep);
|
|
16867
|
+
D_ASSERT(!splits.empty());
|
|
16868
|
+
auto vec = StringUtil::Split(splits.back(), ".");
|
|
16869
|
+
D_ASSERT(!vec.empty());
|
|
16721
16870
|
return vec[0];
|
|
16722
16871
|
}
|
|
16723
16872
|
|
|
@@ -18792,7 +18941,8 @@ namespace duckdb {
|
|
|
18792
18941
|
|
|
18793
18942
|
static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
|
|
18794
18943
|
unordered_map<string, column_t> &column_map,
|
|
18795
|
-
|
|
18944
|
+
duckdb_re2::RE2 &compiled_regex, bool filename_col,
|
|
18945
|
+
bool hive_partition_cols) {
|
|
18796
18946
|
unordered_map<column_t, string> result;
|
|
18797
18947
|
|
|
18798
18948
|
if (filename_col) {
|
|
@@ -18803,7 +18953,7 @@ static unordered_map<column_t, string> GetKnownColumnValues(string &filename,
|
|
|
18803
18953
|
}
|
|
18804
18954
|
|
|
18805
18955
|
if (hive_partition_cols) {
|
|
18806
|
-
auto partitions = HivePartitioning::Parse(filename);
|
|
18956
|
+
auto partitions = HivePartitioning::Parse(filename, compiled_regex);
|
|
18807
18957
|
for (auto &partition : partitions) {
|
|
18808
18958
|
auto lookup_column_id = column_map.find(partition.first);
|
|
18809
18959
|
if (lookup_column_id != column_map.end()) {
|
|
@@ -18841,10 +18991,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
|
|
|
18841
18991
|
// - s3://bucket/var1=value1/bla/bla/var2=value2
|
|
18842
18992
|
// - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
|
|
18843
18993
|
// - folder/folder/folder/../var1=value1/etc/.//var2=value2
|
|
18844
|
-
|
|
18845
|
-
std::map<string, string> result;
|
|
18994
|
+
const string HivePartitioning::REGEX_STRING = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
|
|
18846
18995
|
|
|
18847
|
-
|
|
18996
|
+
std::map<string, string> HivePartitioning::Parse(string &filename, duckdb_re2::RE2 ®ex) {
|
|
18997
|
+
std::map<string, string> result;
|
|
18848
18998
|
duckdb_re2::StringPiece input(filename); // Wrap a StringPiece around it
|
|
18849
18999
|
|
|
18850
19000
|
string var;
|
|
@@ -18855,6 +19005,11 @@ std::map<string, string> HivePartitioning::Parse(string &filename) {
|
|
|
18855
19005
|
return result;
|
|
18856
19006
|
}
|
|
18857
19007
|
|
|
19008
|
+
std::map<string, string> HivePartitioning::Parse(string &filename) {
|
|
19009
|
+
duckdb_re2::RE2 regex(REGEX_STRING);
|
|
19010
|
+
return Parse(filename, regex);
|
|
19011
|
+
}
|
|
19012
|
+
|
|
18858
19013
|
// TODO: this can still be improved by removing the parts of filter expressions that are true for all remaining files.
|
|
18859
19014
|
// currently, only expressions that cannot be evaluated during pushdown are removed.
|
|
18860
19015
|
void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<unique_ptr<Expression>> &filters,
|
|
@@ -18862,6 +19017,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
|
|
|
18862
19017
|
bool hive_enabled, bool filename_enabled) {
|
|
18863
19018
|
vector<string> pruned_files;
|
|
18864
19019
|
vector<unique_ptr<Expression>> pruned_filters;
|
|
19020
|
+
duckdb_re2::RE2 regex(REGEX_STRING);
|
|
18865
19021
|
|
|
18866
19022
|
if ((!filename_enabled && !hive_enabled) || filters.empty()) {
|
|
18867
19023
|
return;
|
|
@@ -18870,7 +19026,7 @@ void HivePartitioning::ApplyFiltersToFileList(vector<string> &files, vector<uniq
|
|
|
18870
19026
|
for (idx_t i = 0; i < files.size(); i++) {
|
|
18871
19027
|
auto &file = files[i];
|
|
18872
19028
|
bool should_prune_file = false;
|
|
18873
|
-
auto known_values = GetKnownColumnValues(file, column_map, filename_enabled, hive_enabled);
|
|
19029
|
+
auto known_values = GetKnownColumnValues(file, column_map, regex, filename_enabled, hive_enabled);
|
|
18874
19030
|
|
|
18875
19031
|
FilterCombiner combiner;
|
|
18876
19032
|
for (auto &filter : filters) {
|
|
@@ -19102,6 +19258,8 @@ private:
|
|
|
19102
19258
|
//! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location
|
|
19103
19259
|
void SetFilePointer(FileHandle &handle, idx_t location);
|
|
19104
19260
|
idx_t GetFilePointer(FileHandle &handle);
|
|
19261
|
+
|
|
19262
|
+
vector<string> FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path);
|
|
19105
19263
|
};
|
|
19106
19264
|
|
|
19107
19265
|
} // namespace duckdb
|
|
@@ -19983,6 +20141,26 @@ static void GlobFiles(FileSystem &fs, const string &path, const string &glob, bo
|
|
|
19983
20141
|
});
|
|
19984
20142
|
}
|
|
19985
20143
|
|
|
20144
|
+
vector<string> LocalFileSystem::FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path) {
|
|
20145
|
+
vector<string> result;
|
|
20146
|
+
if (FileExists(path) || IsPipe(path)) {
|
|
20147
|
+
result.push_back(path);
|
|
20148
|
+
} else if (!absolute_path) {
|
|
20149
|
+
Value value;
|
|
20150
|
+
if (opener->TryGetCurrentSetting("file_search_path", value)) {
|
|
20151
|
+
auto search_paths_str = value.ToString();
|
|
20152
|
+
std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
|
|
20153
|
+
for (const auto &search_path : search_paths) {
|
|
20154
|
+
auto joined_path = JoinPath(search_path, path);
|
|
20155
|
+
if (FileExists(joined_path) || IsPipe(joined_path)) {
|
|
20156
|
+
result.push_back(joined_path);
|
|
20157
|
+
}
|
|
20158
|
+
}
|
|
20159
|
+
}
|
|
20160
|
+
}
|
|
20161
|
+
return result;
|
|
20162
|
+
}
|
|
20163
|
+
|
|
19986
20164
|
vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
|
|
19987
20165
|
if (path.empty()) {
|
|
19988
20166
|
return vector<string>();
|
|
@@ -20029,23 +20207,7 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
|
|
|
20029
20207
|
// Check if the path has a glob at all
|
|
20030
20208
|
if (!HasGlob(path)) {
|
|
20031
20209
|
// no glob: return only the file (if it exists or is a pipe)
|
|
20032
|
-
|
|
20033
|
-
if (FileExists(path) || IsPipe(path)) {
|
|
20034
|
-
result.push_back(path);
|
|
20035
|
-
} else if (!absolute_path) {
|
|
20036
|
-
Value value;
|
|
20037
|
-
if (opener->TryGetCurrentSetting("file_search_path", value)) {
|
|
20038
|
-
auto search_paths_str = value.ToString();
|
|
20039
|
-
std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
|
|
20040
|
-
for (const auto &search_path : search_paths) {
|
|
20041
|
-
auto joined_path = JoinPath(search_path, path);
|
|
20042
|
-
if (FileExists(joined_path) || IsPipe(joined_path)) {
|
|
20043
|
-
result.push_back(joined_path);
|
|
20044
|
-
}
|
|
20045
|
-
}
|
|
20046
|
-
}
|
|
20047
|
-
}
|
|
20048
|
-
return result;
|
|
20210
|
+
return FetchFileWithoutGlob(path, opener, absolute_path);
|
|
20049
20211
|
}
|
|
20050
20212
|
vector<string> previous_directories;
|
|
20051
20213
|
if (absolute_path) {
|
|
@@ -20079,7 +20241,12 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
|
|
|
20079
20241
|
}
|
|
20080
20242
|
}
|
|
20081
20243
|
}
|
|
20082
|
-
if (
|
|
20244
|
+
if (result.empty()) {
|
|
20245
|
+
// no result found that matches the glob
|
|
20246
|
+
// last ditch effort: search the path as a string literal
|
|
20247
|
+
return FetchFileWithoutGlob(path, opener, absolute_path);
|
|
20248
|
+
}
|
|
20249
|
+
if (is_last_chunk) {
|
|
20083
20250
|
return result;
|
|
20084
20251
|
}
|
|
20085
20252
|
previous_directories = move(result);
|
|
@@ -22428,14 +22595,16 @@ struct IntervalToStringCast {
|
|
|
22428
22595
|
if (micros < 0) {
|
|
22429
22596
|
// negative time: append negative sign
|
|
22430
22597
|
buffer[length++] = '-';
|
|
22598
|
+
} else {
|
|
22431
22599
|
micros = -micros;
|
|
22432
22600
|
}
|
|
22433
|
-
int64_t hour = micros / Interval::MICROS_PER_HOUR;
|
|
22434
|
-
micros
|
|
22435
|
-
int64_t min = micros / Interval::MICROS_PER_MINUTE;
|
|
22436
|
-
micros
|
|
22437
|
-
int64_t sec = micros / Interval::MICROS_PER_SEC;
|
|
22438
|
-
micros
|
|
22601
|
+
int64_t hour = -(micros / Interval::MICROS_PER_HOUR);
|
|
22602
|
+
micros += hour * Interval::MICROS_PER_HOUR;
|
|
22603
|
+
int64_t min = -(micros / Interval::MICROS_PER_MINUTE);
|
|
22604
|
+
micros += min * Interval::MICROS_PER_MINUTE;
|
|
22605
|
+
int64_t sec = -(micros / Interval::MICROS_PER_SEC);
|
|
22606
|
+
micros += sec * Interval::MICROS_PER_SEC;
|
|
22607
|
+
micros = -micros;
|
|
22439
22608
|
|
|
22440
22609
|
if (hour < 10) {
|
|
22441
22610
|
buffer[length++] = '0';
|
|
@@ -28558,7 +28727,7 @@ template <idx_t radix_bits>
|
|
|
28558
28727
|
struct RadixPartitioningConstants {
|
|
28559
28728
|
public:
|
|
28560
28729
|
static constexpr const idx_t NUM_RADIX_BITS = radix_bits;
|
|
28561
|
-
static constexpr const idx_t NUM_PARTITIONS = 1 << NUM_RADIX_BITS;
|
|
28730
|
+
static constexpr const idx_t NUM_PARTITIONS = (idx_t)1 << NUM_RADIX_BITS;
|
|
28562
28731
|
static constexpr const idx_t TMP_BUF_SIZE = 8;
|
|
28563
28732
|
|
|
28564
28733
|
public:
|
|
@@ -28576,7 +28745,7 @@ private:
|
|
|
28576
28745
|
struct RadixPartitioning {
|
|
28577
28746
|
public:
|
|
28578
28747
|
static idx_t NumberOfPartitions(idx_t radix_bits) {
|
|
28579
|
-
return 1 << radix_bits;
|
|
28748
|
+
return (idx_t)1 << radix_bits;
|
|
28580
28749
|
}
|
|
28581
28750
|
|
|
28582
28751
|
//! Partition the data in block_collection/string_heap to multiple partitions
|
|
@@ -33336,6 +33505,9 @@ void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr
|
|
|
33336
33505
|
|
|
33337
33506
|
static inline void VerifyUnswizzledString(const RowLayout &layout, const idx_t &col_idx, const data_ptr_t &row_ptr) {
|
|
33338
33507
|
#ifdef DEBUG
|
|
33508
|
+
if (layout.GetTypes()[col_idx] == LogicalTypeId::BLOB) {
|
|
33509
|
+
return;
|
|
33510
|
+
}
|
|
33339
33511
|
idx_t entry_idx;
|
|
33340
33512
|
idx_t idx_in_entry;
|
|
33341
33513
|
ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
|
|
@@ -35673,7 +35845,10 @@ struct SortConstants {
|
|
|
35673
35845
|
|
|
35674
35846
|
struct SortLayout {
|
|
35675
35847
|
public:
|
|
35848
|
+
SortLayout() {
|
|
35849
|
+
}
|
|
35676
35850
|
explicit SortLayout(const vector<BoundOrderByNode> &orders);
|
|
35851
|
+
SortLayout GetPrefixComparisonLayout(idx_t num_prefix_cols) const;
|
|
35677
35852
|
|
|
35678
35853
|
public:
|
|
35679
35854
|
idx_t column_count;
|
|
@@ -37324,6 +37499,32 @@ SortLayout::SortLayout(const vector<BoundOrderByNode> &orders)
|
|
|
37324
37499
|
blob_layout.Initialize(blob_layout_types);
|
|
37325
37500
|
}
|
|
37326
37501
|
|
|
37502
|
+
SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const {
|
|
37503
|
+
SortLayout result;
|
|
37504
|
+
result.column_count = num_prefix_cols;
|
|
37505
|
+
result.all_constant = true;
|
|
37506
|
+
result.comparison_size = 0;
|
|
37507
|
+
for (idx_t col_idx = 0; col_idx < num_prefix_cols; col_idx++) {
|
|
37508
|
+
result.order_types.push_back(order_types[col_idx]);
|
|
37509
|
+
result.order_by_null_types.push_back(order_by_null_types[col_idx]);
|
|
37510
|
+
result.logical_types.push_back(logical_types[col_idx]);
|
|
37511
|
+
|
|
37512
|
+
result.all_constant = result.all_constant && constant_size[col_idx];
|
|
37513
|
+
result.constant_size.push_back(constant_size[col_idx]);
|
|
37514
|
+
|
|
37515
|
+
result.comparison_size += column_sizes[col_idx];
|
|
37516
|
+
result.column_sizes.push_back(column_sizes[col_idx]);
|
|
37517
|
+
|
|
37518
|
+
result.prefix_lengths.push_back(prefix_lengths[col_idx]);
|
|
37519
|
+
result.stats.push_back(stats[col_idx]);
|
|
37520
|
+
result.has_null.push_back(has_null[col_idx]);
|
|
37521
|
+
}
|
|
37522
|
+
result.entry_size = entry_size;
|
|
37523
|
+
result.blob_layout = blob_layout;
|
|
37524
|
+
result.sorting_to_blob_col = sorting_to_blob_col;
|
|
37525
|
+
return result;
|
|
37526
|
+
}
|
|
37527
|
+
|
|
37327
37528
|
LocalSortState::LocalSortState() : initialized(false) {
|
|
37328
37529
|
}
|
|
37329
37530
|
|
|
@@ -39403,7 +39604,7 @@ public:
|
|
|
39403
39604
|
namespace duckdb {
|
|
39404
39605
|
|
|
39405
39606
|
enum class UnicodeType { INVALID, ASCII, UNICODE };
|
|
39406
|
-
enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE };
|
|
39607
|
+
enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE, INVALID_UNICODE };
|
|
39407
39608
|
|
|
39408
39609
|
class Utf8Proc {
|
|
39409
39610
|
public:
|
|
@@ -47573,11 +47774,36 @@ Value Value::CreateValue(dtime_t value) {
|
|
|
47573
47774
|
return Value::TIME(value);
|
|
47574
47775
|
}
|
|
47575
47776
|
|
|
47777
|
+
template <>
|
|
47778
|
+
Value Value::CreateValue(dtime_tz_t value) {
|
|
47779
|
+
return Value::TIMETZ(value);
|
|
47780
|
+
}
|
|
47781
|
+
|
|
47576
47782
|
template <>
|
|
47577
47783
|
Value Value::CreateValue(timestamp_t value) {
|
|
47578
47784
|
return Value::TIMESTAMP(value);
|
|
47579
47785
|
}
|
|
47580
47786
|
|
|
47787
|
+
template <>
|
|
47788
|
+
Value Value::CreateValue(timestamp_sec_t value) {
|
|
47789
|
+
return Value::TIMESTAMPSEC(value);
|
|
47790
|
+
}
|
|
47791
|
+
|
|
47792
|
+
template <>
|
|
47793
|
+
Value Value::CreateValue(timestamp_ms_t value) {
|
|
47794
|
+
return Value::TIMESTAMPMS(value);
|
|
47795
|
+
}
|
|
47796
|
+
|
|
47797
|
+
template <>
|
|
47798
|
+
Value Value::CreateValue(timestamp_ns_t value) {
|
|
47799
|
+
return Value::TIMESTAMPNS(value);
|
|
47800
|
+
}
|
|
47801
|
+
|
|
47802
|
+
template <>
|
|
47803
|
+
Value Value::CreateValue(timestamp_tz_t value) {
|
|
47804
|
+
return Value::TIMESTAMPTZ(value);
|
|
47805
|
+
}
|
|
47806
|
+
|
|
47581
47807
|
template <>
|
|
47582
47808
|
Value Value::CreateValue(const char *value) {
|
|
47583
47809
|
return Value(string(value));
|
|
@@ -49150,19 +49376,6 @@ void Vector::Resize(idx_t cur_size, idx_t new_size) {
|
|
|
49150
49376
|
}
|
|
49151
49377
|
}
|
|
49152
49378
|
|
|
49153
|
-
// FIXME Just like DECIMAL, it's important that type_info gets considered when determining whether or not to cast
|
|
49154
|
-
// just comparing internal type is not always enough
|
|
49155
|
-
static bool ValueShouldBeCast(const LogicalType &incoming, const LogicalType &target) {
|
|
49156
|
-
if (incoming.InternalType() != target.InternalType()) {
|
|
49157
|
-
return true;
|
|
49158
|
-
}
|
|
49159
|
-
if (incoming.id() == LogicalTypeId::DECIMAL && incoming.id() == target.id()) {
|
|
49160
|
-
//! Compare the type_info
|
|
49161
|
-
return incoming != target;
|
|
49162
|
-
}
|
|
49163
|
-
return false;
|
|
49164
|
-
}
|
|
49165
|
-
|
|
49166
49379
|
void Vector::SetValue(idx_t index, const Value &val) {
|
|
49167
49380
|
if (GetVectorType() == VectorType::DICTIONARY_VECTOR) {
|
|
49168
49381
|
// dictionary: apply dictionary and forward to child
|
|
@@ -49170,10 +49383,11 @@ void Vector::SetValue(idx_t index, const Value &val) {
|
|
|
49170
49383
|
auto &child = DictionaryVector::Child(*this);
|
|
49171
49384
|
return child.SetValue(sel_vector.get_index(index), val);
|
|
49172
49385
|
}
|
|
49173
|
-
if (
|
|
49386
|
+
if (val.type() != GetType()) {
|
|
49174
49387
|
SetValue(index, val.CastAs(GetType()));
|
|
49175
49388
|
return;
|
|
49176
49389
|
}
|
|
49390
|
+
D_ASSERT(val.type().InternalType() == GetType().InternalType());
|
|
49177
49391
|
|
|
49178
49392
|
validity.EnsureWritable();
|
|
49179
49393
|
validity.Set(index, !val.IsNull());
|
|
@@ -49424,7 +49638,10 @@ Value Vector::GetValue(const Vector &v_p, idx_t index_p) {
|
|
|
49424
49638
|
auto value = GetValueInternal(v_p, index_p);
|
|
49425
49639
|
// set the alias of the type to the correct value, if there is a type alias
|
|
49426
49640
|
if (v_p.GetType().HasAlias()) {
|
|
49427
|
-
value.type().
|
|
49641
|
+
value.type().CopyAuxInfo(v_p.GetType());
|
|
49642
|
+
}
|
|
49643
|
+
if (v_p.GetType().id() != LogicalTypeId::AGGREGATE_STATE && value.type().id() != LogicalTypeId::AGGREGATE_STATE) {
|
|
49644
|
+
D_ASSERT(v_p.GetType() == value.type());
|
|
49428
49645
|
}
|
|
49429
49646
|
return value;
|
|
49430
49647
|
}
|
|
@@ -50216,6 +50433,24 @@ void StringVector::AddHeapReference(Vector &vector, Vector &other) {
|
|
|
50216
50433
|
StringVector::AddBuffer(vector, other.auxiliary);
|
|
50217
50434
|
}
|
|
50218
50435
|
|
|
50436
|
+
Vector &MapVector::GetKeys(Vector &vector) {
|
|
50437
|
+
auto &entries = StructVector::GetEntries(vector);
|
|
50438
|
+
D_ASSERT(entries.size() == 2);
|
|
50439
|
+
return *entries[0];
|
|
50440
|
+
}
|
|
50441
|
+
Vector &MapVector::GetValues(Vector &vector) {
|
|
50442
|
+
auto &entries = StructVector::GetEntries(vector);
|
|
50443
|
+
D_ASSERT(entries.size() == 2);
|
|
50444
|
+
return *entries[1];
|
|
50445
|
+
}
|
|
50446
|
+
|
|
50447
|
+
const Vector &MapVector::GetKeys(const Vector &vector) {
|
|
50448
|
+
return GetKeys((Vector &)vector);
|
|
50449
|
+
}
|
|
50450
|
+
const Vector &MapVector::GetValues(const Vector &vector) {
|
|
50451
|
+
return GetValues((Vector &)vector);
|
|
50452
|
+
}
|
|
50453
|
+
|
|
50219
50454
|
vector<unique_ptr<Vector>> &StructVector::GetEntries(Vector &vector) {
|
|
50220
50455
|
D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT || vector.GetType().id() == LogicalTypeId::MAP);
|
|
50221
50456
|
if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
|
|
@@ -51491,6 +51726,7 @@ public:
|
|
|
51491
51726
|
if (!alias.empty()) {
|
|
51492
51727
|
return false;
|
|
51493
51728
|
}
|
|
51729
|
+
//! We only need to compare aliases when both types have them in this case
|
|
51494
51730
|
return true;
|
|
51495
51731
|
}
|
|
51496
51732
|
if (alias != other_p->alias) {
|
|
@@ -51504,8 +51740,7 @@ public:
|
|
|
51504
51740
|
if (type != other_p->type) {
|
|
51505
51741
|
return false;
|
|
51506
51742
|
}
|
|
51507
|
-
|
|
51508
|
-
return alias == other.alias && EqualsInternal(other_p);
|
|
51743
|
+
return alias == other_p->alias && EqualsInternal(other_p);
|
|
51509
51744
|
}
|
|
51510
51745
|
//! Serializes a ExtraTypeInfo to a stand-alone binary blob
|
|
51511
51746
|
virtual void Serialize(FieldWriter &writer) const {};
|
|
@@ -52184,10 +52419,7 @@ LogicalType LogicalType::Deserialize(Deserializer &source) {
|
|
|
52184
52419
|
return LogicalType(id, move(info));
|
|
52185
52420
|
}
|
|
52186
52421
|
|
|
52187
|
-
bool LogicalType::
|
|
52188
|
-
if (id_ != rhs.id_) {
|
|
52189
|
-
return false;
|
|
52190
|
-
}
|
|
52422
|
+
bool LogicalType::EqualTypeInfo(const LogicalType &rhs) const {
|
|
52191
52423
|
if (type_info_.get() == rhs.type_info_.get()) {
|
|
52192
52424
|
return true;
|
|
52193
52425
|
}
|
|
@@ -52199,6 +52431,13 @@ bool LogicalType::operator==(const LogicalType &rhs) const {
|
|
|
52199
52431
|
}
|
|
52200
52432
|
}
|
|
52201
52433
|
|
|
52434
|
+
bool LogicalType::operator==(const LogicalType &rhs) const {
|
|
52435
|
+
if (id_ != rhs.id_) {
|
|
52436
|
+
return false;
|
|
52437
|
+
}
|
|
52438
|
+
return EqualTypeInfo(rhs);
|
|
52439
|
+
}
|
|
52440
|
+
|
|
52202
52441
|
} // namespace duckdb
|
|
52203
52442
|
|
|
52204
52443
|
|
|
@@ -63069,6 +63308,16 @@ bool DistinctAggregateData::IsDistinct(idx_t index) const {
|
|
|
63069
63308
|
|
|
63070
63309
|
|
|
63071
63310
|
|
|
63311
|
+
//===----------------------------------------------------------------------===//
|
|
63312
|
+
// DuckDB
|
|
63313
|
+
//
|
|
63314
|
+
// duckdb/parallel/base_pipeline_event.hpp
|
|
63315
|
+
//
|
|
63316
|
+
//
|
|
63317
|
+
//===----------------------------------------------------------------------===//
|
|
63318
|
+
|
|
63319
|
+
|
|
63320
|
+
|
|
63072
63321
|
//===----------------------------------------------------------------------===//
|
|
63073
63322
|
// DuckDB
|
|
63074
63323
|
//
|
|
@@ -63142,6 +63391,22 @@ protected:
|
|
|
63142
63391
|
|
|
63143
63392
|
|
|
63144
63393
|
|
|
63394
|
+
namespace duckdb {
|
|
63395
|
+
|
|
63396
|
+
//! A BasePipelineEvent is used as the basis of any event that belongs to a specific pipeline
|
|
63397
|
+
class BasePipelineEvent : public Event {
|
|
63398
|
+
public:
|
|
63399
|
+
BasePipelineEvent(shared_ptr<Pipeline> pipeline);
|
|
63400
|
+
BasePipelineEvent(Pipeline &pipeline);
|
|
63401
|
+
|
|
63402
|
+
//! The pipeline that this event belongs to
|
|
63403
|
+
shared_ptr<Pipeline> pipeline;
|
|
63404
|
+
};
|
|
63405
|
+
|
|
63406
|
+
} // namespace duckdb
|
|
63407
|
+
|
|
63408
|
+
|
|
63409
|
+
|
|
63145
63410
|
namespace duckdb {
|
|
63146
63411
|
|
|
63147
63412
|
PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types,
|
|
@@ -63298,16 +63563,15 @@ void PhysicalHashAggregate::Combine(ExecutionContext &context, GlobalSinkState &
|
|
|
63298
63563
|
}
|
|
63299
63564
|
}
|
|
63300
63565
|
|
|
63301
|
-
class HashAggregateFinalizeEvent : public
|
|
63566
|
+
class HashAggregateFinalizeEvent : public BasePipelineEvent {
|
|
63302
63567
|
public:
|
|
63303
63568
|
HashAggregateFinalizeEvent(const PhysicalHashAggregate &op_p, HashAggregateGlobalState &gstate_p,
|
|
63304
63569
|
Pipeline *pipeline_p)
|
|
63305
|
-
:
|
|
63570
|
+
: BasePipelineEvent(*pipeline_p), op(op_p), gstate(gstate_p) {
|
|
63306
63571
|
}
|
|
63307
63572
|
|
|
63308
63573
|
const PhysicalHashAggregate &op;
|
|
63309
63574
|
HashAggregateGlobalState &gstate;
|
|
63310
|
-
Pipeline *pipeline;
|
|
63311
63575
|
|
|
63312
63576
|
public:
|
|
63313
63577
|
void Schedule() override {
|
|
@@ -64569,15 +64833,14 @@ private:
|
|
|
64569
64833
|
};
|
|
64570
64834
|
|
|
64571
64835
|
// TODO: Create tasks and run these in parallel instead of doing this all in Schedule, single threaded
|
|
64572
|
-
class DistinctAggregateFinalizeEvent : public
|
|
64836
|
+
class DistinctAggregateFinalizeEvent : public BasePipelineEvent {
|
|
64573
64837
|
public:
|
|
64574
64838
|
DistinctAggregateFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
|
|
64575
|
-
Pipeline
|
|
64576
|
-
:
|
|
64839
|
+
Pipeline &pipeline_p, ClientContext &context)
|
|
64840
|
+
: BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), context(context) {
|
|
64577
64841
|
}
|
|
64578
64842
|
const PhysicalUngroupedAggregate &op;
|
|
64579
64843
|
UngroupedAggregateGlobalState &gstate;
|
|
64580
|
-
Pipeline *pipeline;
|
|
64581
64844
|
ClientContext &context;
|
|
64582
64845
|
|
|
64583
64846
|
public:
|
|
@@ -64590,16 +64853,15 @@ public:
|
|
|
64590
64853
|
}
|
|
64591
64854
|
};
|
|
64592
64855
|
|
|
64593
|
-
class DistinctCombineFinalizeEvent : public
|
|
64856
|
+
class DistinctCombineFinalizeEvent : public BasePipelineEvent {
|
|
64594
64857
|
public:
|
|
64595
64858
|
DistinctCombineFinalizeEvent(const PhysicalUngroupedAggregate &op_p, UngroupedAggregateGlobalState &gstate_p,
|
|
64596
|
-
Pipeline
|
|
64597
|
-
:
|
|
64859
|
+
Pipeline &pipeline_p, ClientContext &client)
|
|
64860
|
+
: BasePipelineEvent(pipeline_p), op(op_p), gstate(gstate_p), client(client) {
|
|
64598
64861
|
}
|
|
64599
64862
|
|
|
64600
64863
|
const PhysicalUngroupedAggregate &op;
|
|
64601
64864
|
UngroupedAggregateGlobalState &gstate;
|
|
64602
|
-
Pipeline *pipeline;
|
|
64603
64865
|
ClientContext &client;
|
|
64604
64866
|
|
|
64605
64867
|
public:
|
|
@@ -64615,7 +64877,7 @@ public:
|
|
|
64615
64877
|
SetTasks(move(tasks));
|
|
64616
64878
|
|
|
64617
64879
|
//! Now that all tables are combined, it's time to do the distinct aggregations
|
|
64618
|
-
auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, pipeline, client);
|
|
64880
|
+
auto new_event = make_shared<DistinctAggregateFinalizeEvent>(op, gstate, *pipeline, client);
|
|
64619
64881
|
this->InsertEvent(move(new_event));
|
|
64620
64882
|
}
|
|
64621
64883
|
};
|
|
@@ -64644,12 +64906,12 @@ SinkFinalizeType PhysicalUngroupedAggregate::FinalizeDistinct(Pipeline &pipeline
|
|
|
64644
64906
|
}
|
|
64645
64907
|
}
|
|
64646
64908
|
if (any_partitioned) {
|
|
64647
|
-
auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate,
|
|
64909
|
+
auto new_event = make_shared<DistinctCombineFinalizeEvent>(*this, gstate, pipeline, context);
|
|
64648
64910
|
event.InsertEvent(move(new_event));
|
|
64649
64911
|
} else {
|
|
64650
64912
|
//! Hashtables aren't partitioned, they dont need to be joined first
|
|
64651
64913
|
//! So we can compute the aggregate already
|
|
64652
|
-
auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate,
|
|
64914
|
+
auto new_event = make_shared<DistinctAggregateFinalizeEvent>(*this, gstate, pipeline, context);
|
|
64653
64915
|
event.InsertEvent(move(new_event));
|
|
64654
64916
|
}
|
|
64655
64917
|
return SinkFinalizeType::READY;
|
|
@@ -64927,12 +65189,14 @@ public:
|
|
|
64927
65189
|
|
|
64928
65190
|
WindowGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
|
|
64929
65191
|
const Types &payload_types, idx_t max_mem, bool external)
|
|
64930
|
-
: memory_per_thread(max_mem), count(0)
|
|
65192
|
+
: memory_per_thread(max_mem), count(0) {
|
|
64931
65193
|
|
|
64932
65194
|
RowLayout payload_layout;
|
|
64933
65195
|
payload_layout.Initialize(payload_types);
|
|
64934
65196
|
global_sort = make_unique<GlobalSortState>(buffer_manager, orders, payload_layout);
|
|
64935
65197
|
global_sort->external = external;
|
|
65198
|
+
|
|
65199
|
+
partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
|
|
64936
65200
|
}
|
|
64937
65201
|
|
|
64938
65202
|
void Combine(LocalSortState &local_sort) {
|
|
@@ -66393,19 +66657,18 @@ private:
|
|
|
66393
66657
|
WindowGlobalHashGroup &hash_group;
|
|
66394
66658
|
};
|
|
66395
66659
|
|
|
66396
|
-
class WindowMergeEvent : public
|
|
66660
|
+
class WindowMergeEvent : public BasePipelineEvent {
|
|
66397
66661
|
public:
|
|
66398
66662
|
WindowMergeEvent(WindowGlobalSinkState &gstate_p, Pipeline &pipeline_p, WindowGlobalHashGroup &hash_group_p)
|
|
66399
|
-
:
|
|
66663
|
+
: BasePipelineEvent(pipeline_p), gstate(gstate_p), hash_group(hash_group_p) {
|
|
66400
66664
|
}
|
|
66401
66665
|
|
|
66402
66666
|
WindowGlobalSinkState &gstate;
|
|
66403
|
-
Pipeline &pipeline;
|
|
66404
66667
|
WindowGlobalHashGroup &hash_group;
|
|
66405
66668
|
|
|
66406
66669
|
public:
|
|
66407
66670
|
void Schedule() override {
|
|
66408
|
-
auto &context = pipeline
|
|
66671
|
+
auto &context = pipeline->GetClientContext();
|
|
66409
66672
|
|
|
66410
66673
|
// Schedule tasks equal to the number of threads, which will each merge multiple partitions
|
|
66411
66674
|
auto &ts = TaskScheduler::GetScheduler(context);
|
|
@@ -66420,7 +66683,7 @@ public:
|
|
|
66420
66683
|
|
|
66421
66684
|
void FinishEvent() override {
|
|
66422
66685
|
hash_group.global_sort->CompleteMergeRound(true);
|
|
66423
|
-
CreateMergeTasks(pipeline, *this, gstate, hash_group);
|
|
66686
|
+
CreateMergeTasks(*pipeline, *this, gstate, hash_group);
|
|
66424
66687
|
}
|
|
66425
66688
|
|
|
66426
66689
|
static void CreateMergeTasks(Pipeline &pipeline, Event &event, WindowGlobalSinkState &state,
|
|
@@ -67829,6 +68092,11 @@ public:
|
|
|
67829
68092
|
|
|
67830
68093
|
private:
|
|
67831
68094
|
static const vector<string> PathComponents();
|
|
68095
|
+
//! For tagged releases we use the tag, else we use the git commit hash
|
|
68096
|
+
static const string GetVersionDirectoryName();
|
|
68097
|
+
//! Version tags occur with and without 'v', tag in extension path is always with 'v'
|
|
68098
|
+
static const string NormalizeVersionTag(const string &version_tag);
|
|
68099
|
+
static bool IsRelease(const string &version_tag);
|
|
67832
68100
|
|
|
67833
68101
|
private:
|
|
67834
68102
|
static ExtensionLoadResult LoadExtensionInternal(DuckDB &db, const std::string &extension, bool initial_load);
|
|
@@ -70755,18 +71023,17 @@ private:
|
|
|
70755
71023
|
bool parallel;
|
|
70756
71024
|
};
|
|
70757
71025
|
|
|
70758
|
-
class HashJoinFinalizeEvent : public
|
|
71026
|
+
class HashJoinFinalizeEvent : public BasePipelineEvent {
|
|
70759
71027
|
public:
|
|
70760
71028
|
HashJoinFinalizeEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink)
|
|
70761
|
-
:
|
|
71029
|
+
: BasePipelineEvent(pipeline_p), sink(sink) {
|
|
70762
71030
|
}
|
|
70763
71031
|
|
|
70764
|
-
Pipeline &pipeline;
|
|
70765
71032
|
HashJoinGlobalSinkState &sink;
|
|
70766
71033
|
|
|
70767
71034
|
public:
|
|
70768
71035
|
void Schedule() override {
|
|
70769
|
-
auto &context = pipeline
|
|
71036
|
+
auto &context = pipeline->GetClientContext();
|
|
70770
71037
|
auto parallel_construct_count =
|
|
70771
71038
|
context.config.verify_parallelism ? STANDARD_VECTOR_SIZE : PARALLEL_CONSTRUCT_COUNT;
|
|
70772
71039
|
|
|
@@ -70833,20 +71100,19 @@ private:
|
|
|
70833
71100
|
JoinHashTable &local_ht;
|
|
70834
71101
|
};
|
|
70835
71102
|
|
|
70836
|
-
class HashJoinPartitionEvent : public
|
|
71103
|
+
class HashJoinPartitionEvent : public BasePipelineEvent {
|
|
70837
71104
|
public:
|
|
70838
71105
|
HashJoinPartitionEvent(Pipeline &pipeline_p, HashJoinGlobalSinkState &sink,
|
|
70839
71106
|
vector<unique_ptr<JoinHashTable>> &local_hts)
|
|
70840
|
-
:
|
|
71107
|
+
: BasePipelineEvent(pipeline_p), sink(sink), local_hts(local_hts) {
|
|
70841
71108
|
}
|
|
70842
71109
|
|
|
70843
|
-
Pipeline &pipeline;
|
|
70844
71110
|
HashJoinGlobalSinkState &sink;
|
|
70845
71111
|
vector<unique_ptr<JoinHashTable>> &local_hts;
|
|
70846
71112
|
|
|
70847
71113
|
public:
|
|
70848
71114
|
void Schedule() override {
|
|
70849
|
-
auto &context = pipeline
|
|
71115
|
+
auto &context = pipeline->GetClientContext();
|
|
70850
71116
|
vector<unique_ptr<Task>> partition_tasks;
|
|
70851
71117
|
partition_tasks.reserve(local_hts.size());
|
|
70852
71118
|
for (auto &local_ht : local_hts) {
|
|
@@ -70859,7 +71125,7 @@ public:
|
|
|
70859
71125
|
void FinishEvent() override {
|
|
70860
71126
|
local_hts.clear();
|
|
70861
71127
|
sink.hash_table->PrepareExternalFinalize();
|
|
70862
|
-
sink.ScheduleFinalize(pipeline, *this);
|
|
71128
|
+
sink.ScheduleFinalize(*pipeline, *this);
|
|
70863
71129
|
}
|
|
70864
71130
|
};
|
|
70865
71131
|
|
|
@@ -74563,21 +74829,20 @@ private:
|
|
|
74563
74829
|
GlobalSortedTable &table;
|
|
74564
74830
|
};
|
|
74565
74831
|
|
|
74566
|
-
class RangeJoinMergeEvent : public
|
|
74832
|
+
class RangeJoinMergeEvent : public BasePipelineEvent {
|
|
74567
74833
|
public:
|
|
74568
74834
|
using GlobalSortedTable = PhysicalRangeJoin::GlobalSortedTable;
|
|
74569
74835
|
|
|
74570
74836
|
public:
|
|
74571
74837
|
RangeJoinMergeEvent(GlobalSortedTable &table_p, Pipeline &pipeline_p)
|
|
74572
|
-
:
|
|
74838
|
+
: BasePipelineEvent(pipeline_p), table(table_p) {
|
|
74573
74839
|
}
|
|
74574
74840
|
|
|
74575
74841
|
GlobalSortedTable &table;
|
|
74576
|
-
Pipeline &pipeline;
|
|
74577
74842
|
|
|
74578
74843
|
public:
|
|
74579
74844
|
void Schedule() override {
|
|
74580
|
-
auto &context = pipeline
|
|
74845
|
+
auto &context = pipeline->GetClientContext();
|
|
74581
74846
|
|
|
74582
74847
|
// Schedule tasks equal to the number of threads, which will each merge multiple partitions
|
|
74583
74848
|
auto &ts = TaskScheduler::GetScheduler(context);
|
|
@@ -74596,7 +74861,7 @@ public:
|
|
|
74596
74861
|
global_sort_state.CompleteMergeRound(true);
|
|
74597
74862
|
if (global_sort_state.sorted_blocks.size() > 1) {
|
|
74598
74863
|
// Multiple blocks remaining: Schedule the next round
|
|
74599
|
-
table.ScheduleMergeTasks(pipeline, *this);
|
|
74864
|
+
table.ScheduleMergeTasks(*pipeline, *this);
|
|
74600
74865
|
}
|
|
74601
74866
|
}
|
|
74602
74867
|
};
|
|
@@ -74984,18 +75249,17 @@ private:
|
|
|
74984
75249
|
OrderGlobalState &state;
|
|
74985
75250
|
};
|
|
74986
75251
|
|
|
74987
|
-
class OrderMergeEvent : public
|
|
75252
|
+
class OrderMergeEvent : public BasePipelineEvent {
|
|
74988
75253
|
public:
|
|
74989
75254
|
OrderMergeEvent(OrderGlobalState &gstate_p, Pipeline &pipeline_p)
|
|
74990
|
-
:
|
|
75255
|
+
: BasePipelineEvent(pipeline_p), gstate(gstate_p) {
|
|
74991
75256
|
}
|
|
74992
75257
|
|
|
74993
75258
|
OrderGlobalState &gstate;
|
|
74994
|
-
Pipeline &pipeline;
|
|
74995
75259
|
|
|
74996
75260
|
public:
|
|
74997
75261
|
void Schedule() override {
|
|
74998
|
-
auto &context = pipeline
|
|
75262
|
+
auto &context = pipeline->GetClientContext();
|
|
74999
75263
|
|
|
75000
75264
|
// Schedule tasks equal to the number of threads, which will each merge multiple partitions
|
|
75001
75265
|
auto &ts = TaskScheduler::GetScheduler(context);
|
|
@@ -75014,7 +75278,7 @@ public:
|
|
|
75014
75278
|
global_sort_state.CompleteMergeRound();
|
|
75015
75279
|
if (global_sort_state.sorted_blocks.size() > 1) {
|
|
75016
75280
|
// Multiple blocks remaining: Schedule the next round
|
|
75017
|
-
PhysicalOrder::ScheduleMergeTasks(pipeline, *this, gstate);
|
|
75281
|
+
PhysicalOrder::ScheduleMergeTasks(*pipeline, *this, gstate);
|
|
75018
75282
|
}
|
|
75019
75283
|
}
|
|
75020
75284
|
};
|
|
@@ -79914,10 +80178,17 @@ void PhysicalCreateIndex::GetData(ExecutionContext &context, DataChunk &chunk, G
|
|
|
79914
80178
|
return;
|
|
79915
80179
|
}
|
|
79916
80180
|
|
|
80181
|
+
// convert virtual column ids to storage column ids
|
|
80182
|
+
vector<column_t> storage_ids;
|
|
80183
|
+
for (auto &column_id : column_ids) {
|
|
80184
|
+
D_ASSERT(column_id < table.columns.size());
|
|
80185
|
+
storage_ids.push_back(table.columns[column_id].StorageOid());
|
|
80186
|
+
}
|
|
80187
|
+
|
|
79917
80188
|
unique_ptr<Index> index;
|
|
79918
80189
|
switch (info->index_type) {
|
|
79919
80190
|
case IndexType::ART: {
|
|
79920
|
-
index = make_unique<ART>(
|
|
80191
|
+
index = make_unique<ART>(storage_ids, unbound_expressions, info->constraint_type, *context.client.db);
|
|
79921
80192
|
break;
|
|
79922
80193
|
}
|
|
79923
80194
|
default:
|
|
@@ -80222,11 +80493,10 @@ unique_ptr<GlobalSinkState> PhysicalCreateTableAs::GetGlobalSinkState(ClientCont
|
|
|
80222
80493
|
SinkResultType PhysicalCreateTableAs::Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p,
|
|
80223
80494
|
DataChunk &input) const {
|
|
80224
80495
|
auto &sink = (CreateTableAsGlobalState &)state;
|
|
80225
|
-
|
|
80226
|
-
|
|
80227
|
-
|
|
80228
|
-
|
|
80229
|
-
}
|
|
80496
|
+
D_ASSERT(sink.table);
|
|
80497
|
+
lock_guard<mutex> client_guard(sink.append_lock);
|
|
80498
|
+
sink.table->storage->Append(*sink.table, context.client, input);
|
|
80499
|
+
sink.inserted_count += input.size();
|
|
80230
80500
|
return SinkResultType::NEED_MORE_INPUT;
|
|
80231
80501
|
}
|
|
80232
80502
|
|
|
@@ -80636,6 +80906,7 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
|
|
|
80636
80906
|
void PhysicalRecursiveCTE::BuildPipelines(Executor &executor, Pipeline ¤t, PipelineBuildState &state) {
|
|
80637
80907
|
op_state.reset();
|
|
80638
80908
|
sink_state.reset();
|
|
80909
|
+
pipelines.clear();
|
|
80639
80910
|
|
|
80640
80911
|
// recursive CTE
|
|
80641
80912
|
state.SetPipelineSource(current, this);
|
|
@@ -80935,7 +81206,7 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(Allocator &allocator, Buffe
|
|
|
80935
81206
|
total_required_bits += group_bits;
|
|
80936
81207
|
}
|
|
80937
81208
|
// the total amount of groups we allocate space for is 2^required_bits
|
|
80938
|
-
total_groups = 1 << total_required_bits;
|
|
81209
|
+
total_groups = (uint64_t)1 << total_required_bits;
|
|
80939
81210
|
// we don't need to store the groups in a perfect hash table, since the group keys can be deduced by their location
|
|
80940
81211
|
grouping_columns = group_types_p.size();
|
|
80941
81212
|
layout.Initialize(move(aggregate_objects_p));
|
|
@@ -81119,7 +81390,7 @@ static void ReconstructGroupVectorTemplated(uint32_t group_values[], Value &min,
|
|
|
81119
81390
|
static void ReconstructGroupVector(uint32_t group_values[], Value &min, idx_t required_bits, idx_t shift,
|
|
81120
81391
|
idx_t entry_count, Vector &result) {
|
|
81121
81392
|
// construct the mask for this entry
|
|
81122
|
-
idx_t mask = (1 << required_bits) - 1;
|
|
81393
|
+
idx_t mask = ((uint64_t)1 << required_bits) - 1;
|
|
81123
81394
|
switch (result.GetType().InternalType()) {
|
|
81124
81395
|
case PhysicalType::INT8:
|
|
81125
81396
|
ReconstructGroupVectorTemplated<int8_t>(group_values, min, mask, shift, entry_count, result);
|
|
@@ -85366,7 +85637,7 @@ void RadixPartitionedHashTable::SetGroupingValues() {
|
|
|
85366
85637
|
for (idx_t i = 0; i < grouping.size(); i++) {
|
|
85367
85638
|
if (grouping_set.find(grouping[i]) == grouping_set.end()) {
|
|
85368
85639
|
// we don't group on this value!
|
|
85369
|
-
grouping_value += 1 << (grouping.size() - (i + 1));
|
|
85640
|
+
grouping_value += (int64_t)1 << (grouping.size() - (i + 1));
|
|
85370
85641
|
}
|
|
85371
85642
|
}
|
|
85372
85643
|
grouping_values.push_back(Value::BIGINT(grouping_value));
|
|
@@ -90924,7 +91195,21 @@ struct ModeIncluded {
|
|
|
90924
91195
|
const idx_t bias;
|
|
90925
91196
|
};
|
|
90926
91197
|
|
|
90927
|
-
|
|
91198
|
+
struct ModeAssignmentStandard {
|
|
91199
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
|
91200
|
+
static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
|
|
91201
|
+
return RESULT_TYPE(input);
|
|
91202
|
+
}
|
|
91203
|
+
};
|
|
91204
|
+
|
|
91205
|
+
struct ModeAssignmentString {
|
|
91206
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
|
91207
|
+
static RESULT_TYPE Assign(Vector &result, INPUT_TYPE input) {
|
|
91208
|
+
return StringVector::AddString(result, input);
|
|
91209
|
+
}
|
|
91210
|
+
};
|
|
91211
|
+
|
|
91212
|
+
template <typename KEY_TYPE, typename ASSIGN_OP>
|
|
90928
91213
|
struct ModeFunction {
|
|
90929
91214
|
template <class STATE>
|
|
90930
91215
|
static void Initialize(STATE *state) {
|
|
@@ -91037,7 +91322,7 @@ struct ModeFunction {
|
|
|
91037
91322
|
}
|
|
91038
91323
|
|
|
91039
91324
|
if (state->valid) {
|
|
91040
|
-
rdata[rid] = RESULT_TYPE(*state->mode);
|
|
91325
|
+
rdata[rid] = ASSIGN_OP::template Assign<INPUT_TYPE, RESULT_TYPE>(result, *state->mode);
|
|
91041
91326
|
} else {
|
|
91042
91327
|
rmask.Set(rid, false);
|
|
91043
91328
|
}
|
|
@@ -91053,10 +91338,10 @@ struct ModeFunction {
|
|
|
91053
91338
|
}
|
|
91054
91339
|
};
|
|
91055
91340
|
|
|
91056
|
-
template <typename INPUT_TYPE, typename KEY_TYPE>
|
|
91341
|
+
template <typename INPUT_TYPE, typename KEY_TYPE, typename ASSIGN_OP = ModeAssignmentStandard>
|
|
91057
91342
|
AggregateFunction GetTypedModeFunction(const LogicalType &type) {
|
|
91058
91343
|
using STATE = ModeState<KEY_TYPE>;
|
|
91059
|
-
using OP = ModeFunction<KEY_TYPE>;
|
|
91344
|
+
using OP = ModeFunction<KEY_TYPE, ASSIGN_OP>;
|
|
91060
91345
|
auto func = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, INPUT_TYPE, OP>(type, type);
|
|
91061
91346
|
func.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, INPUT_TYPE, OP>;
|
|
91062
91347
|
return func;
|
|
@@ -91092,7 +91377,7 @@ AggregateFunction GetModeAggregate(const LogicalType &type) {
|
|
|
91092
91377
|
return GetTypedModeFunction<interval_t, interval_t>(type);
|
|
91093
91378
|
|
|
91094
91379
|
case PhysicalType::VARCHAR:
|
|
91095
|
-
return GetTypedModeFunction<string_t, string>(type);
|
|
91380
|
+
return GetTypedModeFunction<string_t, string, ModeAssignmentString>(type);
|
|
91096
91381
|
|
|
91097
91382
|
default:
|
|
91098
91383
|
throw NotImplementedException("Unimplemented mode aggregate");
|
|
@@ -93281,21 +93566,21 @@ AggregateFunction GetHistogramFunction(const LogicalType &type) {
|
|
|
93281
93566
|
case LogicalType::VARCHAR:
|
|
93282
93567
|
return GetMapType<HistogramStringFunctor, string, IS_ORDERED>(type);
|
|
93283
93568
|
case LogicalType::TIMESTAMP:
|
|
93284
|
-
return GetMapType<HistogramFunctor,
|
|
93569
|
+
return GetMapType<HistogramFunctor, timestamp_t, IS_ORDERED>(type);
|
|
93285
93570
|
case LogicalType::TIMESTAMP_TZ:
|
|
93286
|
-
return GetMapType<HistogramFunctor,
|
|
93571
|
+
return GetMapType<HistogramFunctor, timestamp_tz_t, IS_ORDERED>(type);
|
|
93287
93572
|
case LogicalType::TIMESTAMP_S:
|
|
93288
|
-
return GetMapType<HistogramFunctor,
|
|
93573
|
+
return GetMapType<HistogramFunctor, timestamp_sec_t, IS_ORDERED>(type);
|
|
93289
93574
|
case LogicalType::TIMESTAMP_MS:
|
|
93290
|
-
return GetMapType<HistogramFunctor,
|
|
93575
|
+
return GetMapType<HistogramFunctor, timestamp_ms_t, IS_ORDERED>(type);
|
|
93291
93576
|
case LogicalType::TIMESTAMP_NS:
|
|
93292
|
-
return GetMapType<HistogramFunctor,
|
|
93577
|
+
return GetMapType<HistogramFunctor, timestamp_ns_t, IS_ORDERED>(type);
|
|
93293
93578
|
case LogicalType::TIME:
|
|
93294
|
-
return GetMapType<HistogramFunctor,
|
|
93579
|
+
return GetMapType<HistogramFunctor, dtime_t, IS_ORDERED>(type);
|
|
93295
93580
|
case LogicalType::TIME_TZ:
|
|
93296
|
-
return GetMapType<HistogramFunctor,
|
|
93581
|
+
return GetMapType<HistogramFunctor, dtime_tz_t, IS_ORDERED>(type);
|
|
93297
93582
|
case LogicalType::DATE:
|
|
93298
|
-
return GetMapType<HistogramFunctor,
|
|
93583
|
+
return GetMapType<HistogramFunctor, date_t, IS_ORDERED>(type);
|
|
93299
93584
|
default:
|
|
93300
93585
|
throw InternalException("Unimplemented histogram aggregate");
|
|
93301
93586
|
}
|
|
@@ -96859,7 +97144,8 @@ struct DateDiff {
|
|
|
96859
97144
|
struct WeekOperator {
|
|
96860
97145
|
template <class TA, class TB, class TR>
|
|
96861
97146
|
static inline TR Operation(TA startdate, TB enddate) {
|
|
96862
|
-
return Date::Epoch(enddate) / Interval::SECS_PER_WEEK -
|
|
97147
|
+
return Date::Epoch(Date::GetMondayOfCurrentWeek(enddate)) / Interval::SECS_PER_WEEK -
|
|
97148
|
+
Date::Epoch(Date::GetMondayOfCurrentWeek(startdate)) / Interval::SECS_PER_WEEK;
|
|
96863
97149
|
}
|
|
96864
97150
|
};
|
|
96865
97151
|
|
|
@@ -103243,12 +103529,49 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
|
|
|
103243
103529
|
result, state_vector.state_vector, count);
|
|
103244
103530
|
break;
|
|
103245
103531
|
case PhysicalType::INT32:
|
|
103246
|
-
|
|
103247
|
-
|
|
103532
|
+
if (key_type.id() == LogicalTypeId::DATE) {
|
|
103533
|
+
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, date_t>(
|
|
103534
|
+
result, state_vector.state_vector, count);
|
|
103535
|
+
} else {
|
|
103536
|
+
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
|
|
103537
|
+
result, state_vector.state_vector, count);
|
|
103538
|
+
}
|
|
103248
103539
|
break;
|
|
103249
103540
|
case PhysicalType::INT64:
|
|
103250
|
-
|
|
103251
|
-
|
|
103541
|
+
switch (key_type.id()) {
|
|
103542
|
+
case LogicalTypeId::TIME:
|
|
103543
|
+
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_t>(
|
|
103544
|
+
result, state_vector.state_vector, count);
|
|
103545
|
+
break;
|
|
103546
|
+
case LogicalTypeId::TIME_TZ:
|
|
103547
|
+
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_tz_t>(
|
|
103548
|
+
result, state_vector.state_vector, count);
|
|
103549
|
+
break;
|
|
103550
|
+
case LogicalTypeId::TIMESTAMP:
|
|
103551
|
+
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_t>(
|
|
103552
|
+
result, state_vector.state_vector, count);
|
|
103553
|
+
break;
|
|
103554
|
+
case LogicalTypeId::TIMESTAMP_MS:
|
|
103555
|
+
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ms_t>(
|
|
103556
|
+
result, state_vector.state_vector, count);
|
|
103557
|
+
break;
|
|
103558
|
+
case LogicalTypeId::TIMESTAMP_NS:
|
|
103559
|
+
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ns_t>(
|
|
103560
|
+
result, state_vector.state_vector, count);
|
|
103561
|
+
break;
|
|
103562
|
+
case LogicalTypeId::TIMESTAMP_SEC:
|
|
103563
|
+
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_sec_t>(
|
|
103564
|
+
result, state_vector.state_vector, count);
|
|
103565
|
+
break;
|
|
103566
|
+
case LogicalTypeId::TIMESTAMP_TZ:
|
|
103567
|
+
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_tz_t>(
|
|
103568
|
+
result, state_vector.state_vector, count);
|
|
103569
|
+
break;
|
|
103570
|
+
default:
|
|
103571
|
+
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
|
|
103572
|
+
result, state_vector.state_vector, count);
|
|
103573
|
+
break;
|
|
103574
|
+
}
|
|
103252
103575
|
break;
|
|
103253
103576
|
case PhysicalType::FLOAT:
|
|
103254
103577
|
FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, float>(
|
|
@@ -104318,18 +104641,12 @@ void SinkDataChunk(Vector *child_vector, SelectionVector &sel, idx_t offset_list
|
|
|
104318
104641
|
static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
104319
104642
|
D_ASSERT(args.ColumnCount() >= 1 && args.ColumnCount() <= 3);
|
|
104320
104643
|
auto count = args.size();
|
|
104321
|
-
Vector &
|
|
104644
|
+
Vector &input_lists = args.data[0];
|
|
104322
104645
|
|
|
104323
104646
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
104324
104647
|
auto &result_validity = FlatVector::Validity(result);
|
|
104325
104648
|
|
|
104326
|
-
|
|
104327
|
-
if (v.GetVectorType() != VectorType::FLAT_VECTOR && v.GetVectorType() != VectorType::CONSTANT_VECTOR) {
|
|
104328
|
-
v.Flatten(count);
|
|
104329
|
-
}
|
|
104330
|
-
}
|
|
104331
|
-
|
|
104332
|
-
if (lists.GetType().id() == LogicalTypeId::SQLNULL) {
|
|
104649
|
+
if (input_lists.GetType().id() == LogicalTypeId::SQLNULL) {
|
|
104333
104650
|
result_validity.SetInvalid(0);
|
|
104334
104651
|
return;
|
|
104335
104652
|
}
|
|
@@ -104344,15 +104661,18 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
|
|
|
104344
104661
|
LocalSortState local_sort_state;
|
|
104345
104662
|
local_sort_state.Initialize(global_sort_state, buffer_manager);
|
|
104346
104663
|
|
|
104664
|
+
// this ensures that we do not change the order of the entries in the input chunk
|
|
104665
|
+
VectorOperations::Copy(input_lists, result, count, 0, 0);
|
|
104666
|
+
|
|
104347
104667
|
// get the child vector
|
|
104348
|
-
auto lists_size = ListVector::GetListSize(
|
|
104349
|
-
auto &child_vector = ListVector::GetEntry(
|
|
104668
|
+
auto lists_size = ListVector::GetListSize(result);
|
|
104669
|
+
auto &child_vector = ListVector::GetEntry(result);
|
|
104350
104670
|
UnifiedVectorFormat child_data;
|
|
104351
104671
|
child_vector.ToUnifiedFormat(lists_size, child_data);
|
|
104352
104672
|
|
|
104353
104673
|
// get the lists data
|
|
104354
104674
|
UnifiedVectorFormat lists_data;
|
|
104355
|
-
|
|
104675
|
+
result.ToUnifiedFormat(count, lists_data);
|
|
104356
104676
|
auto list_entries = (list_entry_t *)lists_data.data;
|
|
104357
104677
|
|
|
104358
104678
|
// create the lists_indices vector, this contains an element for each list's entry,
|
|
@@ -104449,8 +104769,6 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
|
|
|
104449
104769
|
child_vector.Flatten(sel_sorted_idx);
|
|
104450
104770
|
}
|
|
104451
104771
|
|
|
104452
|
-
result.Reference(lists);
|
|
104453
|
-
|
|
104454
104772
|
if (args.AllConstant()) {
|
|
104455
104773
|
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
104456
104774
|
}
|
|
@@ -105224,16 +105542,21 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
|
|
|
105224
105542
|
auto &map = args.data[0];
|
|
105225
105543
|
auto &key = args.data[1];
|
|
105226
105544
|
|
|
105227
|
-
UnifiedVectorFormat
|
|
105545
|
+
UnifiedVectorFormat map_keys_data;
|
|
105546
|
+
UnifiedVectorFormat key_data;
|
|
105228
105547
|
|
|
105229
|
-
auto &
|
|
105548
|
+
auto &map_keys = MapVector::GetKeys(map);
|
|
105549
|
+
auto &map_values = MapVector::GetValues(map);
|
|
105550
|
+
|
|
105551
|
+
map_keys.ToUnifiedFormat(args.size(), map_keys_data);
|
|
105552
|
+
key.ToUnifiedFormat(args.size(), key_data);
|
|
105230
105553
|
|
|
105231
|
-
children[0]->ToUnifiedFormat(args.size(), offset_data);
|
|
105232
105554
|
for (idx_t row = 0; row < args.size(); row++) {
|
|
105233
|
-
idx_t row_index =
|
|
105234
|
-
|
|
105235
|
-
auto
|
|
105236
|
-
auto
|
|
105555
|
+
idx_t row_index = map_keys_data.sel->get_index(row);
|
|
105556
|
+
idx_t key_index = key_data.sel->get_index(row);
|
|
105557
|
+
auto key_value = key.GetValue(key_index);
|
|
105558
|
+
auto offsets = ListVector::Search(map_keys, key_value, row_index);
|
|
105559
|
+
auto values = ListVector::GetValuesFromOffsets(map_values, offsets);
|
|
105237
105560
|
FillResult(values, result, row);
|
|
105238
105561
|
}
|
|
105239
105562
|
|
|
@@ -108128,6 +108451,24 @@ interval_t DivideOperator::Operation(interval_t left, int64_t right) {
|
|
|
108128
108451
|
return left;
|
|
108129
108452
|
}
|
|
108130
108453
|
|
|
108454
|
+
struct BinaryNumericDivideWrapper {
|
|
108455
|
+
template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
|
|
108456
|
+
static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
|
|
108457
|
+
if (left == NumericLimits<LEFT_TYPE>::Minimum() && right == -1) {
|
|
108458
|
+
throw OutOfRangeException("Overflow in division of %d / %d", left, right);
|
|
108459
|
+
} else if (right == 0) {
|
|
108460
|
+
mask.SetInvalid(idx);
|
|
108461
|
+
return left;
|
|
108462
|
+
} else {
|
|
108463
|
+
return OP::template Operation<LEFT_TYPE, RIGHT_TYPE, RESULT_TYPE>(left, right);
|
|
108464
|
+
}
|
|
108465
|
+
}
|
|
108466
|
+
|
|
108467
|
+
static bool AddsNulls() {
|
|
108468
|
+
return true;
|
|
108469
|
+
}
|
|
108470
|
+
};
|
|
108471
|
+
|
|
108131
108472
|
struct BinaryZeroIsNullWrapper {
|
|
108132
108473
|
template <class FUNC, class OP, class LEFT_TYPE, class RIGHT_TYPE, class RESULT_TYPE>
|
|
108133
108474
|
static inline RESULT_TYPE Operation(FUNC fun, LEFT_TYPE left, RIGHT_TYPE right, ValidityMask &mask, idx_t idx) {
|
|
@@ -108169,13 +108510,13 @@ template <class OP>
|
|
|
108169
108510
|
static scalar_function_t GetBinaryFunctionIgnoreZero(const LogicalType &type) {
|
|
108170
108511
|
switch (type.id()) {
|
|
108171
108512
|
case LogicalTypeId::TINYINT:
|
|
108172
|
-
return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP>;
|
|
108513
|
+
return BinaryScalarFunctionIgnoreZero<int8_t, int8_t, int8_t, OP, BinaryNumericDivideWrapper>;
|
|
108173
108514
|
case LogicalTypeId::SMALLINT:
|
|
108174
|
-
return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP>;
|
|
108515
|
+
return BinaryScalarFunctionIgnoreZero<int16_t, int16_t, int16_t, OP, BinaryNumericDivideWrapper>;
|
|
108175
108516
|
case LogicalTypeId::INTEGER:
|
|
108176
|
-
return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP>;
|
|
108517
|
+
return BinaryScalarFunctionIgnoreZero<int32_t, int32_t, int32_t, OP, BinaryNumericDivideWrapper>;
|
|
108177
108518
|
case LogicalTypeId::BIGINT:
|
|
108178
|
-
return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP>;
|
|
108519
|
+
return BinaryScalarFunctionIgnoreZero<int64_t, int64_t, int64_t, OP, BinaryNumericDivideWrapper>;
|
|
108179
108520
|
case LogicalTypeId::UTINYINT:
|
|
108180
108521
|
return BinaryScalarFunctionIgnoreZero<uint8_t, uint8_t, uint8_t, OP>;
|
|
108181
108522
|
case LogicalTypeId::USMALLINT:
|
|
@@ -114623,11 +114964,22 @@ static void CurrentSchemaFunction(DataChunk &input, ExpressionState &state, Vect
|
|
|
114623
114964
|
|
|
114624
114965
|
// current_schemas
|
|
114625
114966
|
static void CurrentSchemasFunction(DataChunk &input, ExpressionState &state, Vector &result) {
|
|
114967
|
+
if (!input.AllConstant()) {
|
|
114968
|
+
throw NotImplementedException("current_schemas requires a constant input");
|
|
114969
|
+
}
|
|
114970
|
+
if (ConstantVector::IsNull(input.data[0])) {
|
|
114971
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
114972
|
+
ConstantVector::SetNull(result, true);
|
|
114973
|
+
return;
|
|
114974
|
+
}
|
|
114975
|
+
auto implicit_schemas = *ConstantVector::GetData<bool>(input.data[0]);
|
|
114626
114976
|
vector<Value> schema_list;
|
|
114627
|
-
|
|
114977
|
+
auto &catalog_search_path = ClientData::Get(SystemBindData::GetFrom(state).context).catalog_search_path;
|
|
114978
|
+
vector<string> search_path = implicit_schemas ? catalog_search_path->Get() : catalog_search_path->GetSetPaths();
|
|
114628
114979
|
std::transform(search_path.begin(), search_path.end(), std::back_inserter(schema_list),
|
|
114629
114980
|
[](const string &s) -> Value { return Value(s); });
|
|
114630
|
-
|
|
114981
|
+
|
|
114982
|
+
auto val = Value::LIST(LogicalType::VARCHAR, schema_list);
|
|
114631
114983
|
result.Reference(val);
|
|
114632
114984
|
}
|
|
114633
114985
|
|
|
@@ -114926,8 +115278,8 @@ struct ArrowScanLocalState : public LocalTableFunctionState {
|
|
|
114926
115278
|
struct ArrowScanGlobalState : public GlobalTableFunctionState {
|
|
114927
115279
|
unique_ptr<ArrowArrayStreamWrapper> stream;
|
|
114928
115280
|
mutex main_mutex;
|
|
114929
|
-
bool ready = false;
|
|
114930
115281
|
idx_t max_threads = 1;
|
|
115282
|
+
bool done = false;
|
|
114931
115283
|
|
|
114932
115284
|
idx_t MaxThreads() const override {
|
|
114933
115285
|
return max_threads;
|
|
@@ -115215,6 +115567,9 @@ idx_t ArrowTableFunction::ArrowScanMaxThreads(ClientContext &context, const Func
|
|
|
115215
115567
|
bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind_data_p, ArrowScanLocalState &state,
|
|
115216
115568
|
ArrowScanGlobalState ¶llel_state) {
|
|
115217
115569
|
lock_guard<mutex> parallel_lock(parallel_state.main_mutex);
|
|
115570
|
+
if (parallel_state.done) {
|
|
115571
|
+
return false;
|
|
115572
|
+
}
|
|
115218
115573
|
state.chunk_offset = 0;
|
|
115219
115574
|
|
|
115220
115575
|
auto current_chunk = parallel_state.stream->GetNextChunk();
|
|
@@ -115224,6 +115579,7 @@ bool ArrowScanParallelStateNext(ClientContext &context, const FunctionData *bind
|
|
|
115224
115579
|
state.chunk = move(current_chunk);
|
|
115225
115580
|
//! have we run out of chunks? we are done
|
|
115226
115581
|
if (!state.chunk->arrow_array.release) {
|
|
115582
|
+
parallel_state.done = true;
|
|
115227
115583
|
return false;
|
|
115228
115584
|
}
|
|
115229
115585
|
return true;
|
|
@@ -117625,6 +117981,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
|
|
|
117625
117981
|
table_function.named_parameters["skip"] = LogicalType::BIGINT;
|
|
117626
117982
|
table_function.named_parameters["max_line_size"] = LogicalType::VARCHAR;
|
|
117627
117983
|
table_function.named_parameters["maximum_line_size"] = LogicalType::VARCHAR;
|
|
117984
|
+
table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
|
|
117628
117985
|
}
|
|
117629
117986
|
|
|
117630
117987
|
double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
|
|
@@ -121455,8 +121812,7 @@ static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, co
|
|
|
121455
121812
|
// we don't emit any statistics for tables that have outstanding transaction-local data
|
|
121456
121813
|
return nullptr;
|
|
121457
121814
|
}
|
|
121458
|
-
|
|
121459
|
-
return bind_data.table->storage->GetStatistics(context, storage_idx);
|
|
121815
|
+
return bind_data.table->GetStatistics(context, column_id);
|
|
121460
121816
|
}
|
|
121461
121817
|
|
|
121462
121818
|
static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
|
|
@@ -123028,7 +123384,7 @@ bool duckdb_validity_row_is_valid(uint64_t *validity, idx_t row) {
|
|
|
123028
123384
|
}
|
|
123029
123385
|
idx_t entry_idx = row / 64;
|
|
123030
123386
|
idx_t idx_in_entry = row % 64;
|
|
123031
|
-
return validity[entry_idx] & (1 << idx_in_entry);
|
|
123387
|
+
return validity[entry_idx] & ((idx_t)1 << idx_in_entry);
|
|
123032
123388
|
}
|
|
123033
123389
|
|
|
123034
123390
|
void duckdb_validity_set_row_validity(uint64_t *validity, idx_t row, bool valid) {
|
|
@@ -123045,7 +123401,7 @@ void duckdb_validity_set_row_invalid(uint64_t *validity, idx_t row) {
|
|
|
123045
123401
|
}
|
|
123046
123402
|
idx_t entry_idx = row / 64;
|
|
123047
123403
|
idx_t idx_in_entry = row % 64;
|
|
123048
|
-
validity[entry_idx] &= ~(1 << idx_in_entry);
|
|
123404
|
+
validity[entry_idx] &= ~((uint64_t)1 << idx_in_entry);
|
|
123049
123405
|
}
|
|
123050
123406
|
|
|
123051
123407
|
void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
|
|
@@ -123054,7 +123410,7 @@ void duckdb_validity_set_row_valid(uint64_t *validity, idx_t row) {
|
|
|
123054
123410
|
}
|
|
123055
123411
|
idx_t entry_idx = row / 64;
|
|
123056
123412
|
idx_t idx_in_entry = row % 64;
|
|
123057
|
-
validity[entry_idx] |= 1 << idx_in_entry;
|
|
123413
|
+
validity[entry_idx] |= (uint64_t)1 << idx_in_entry;
|
|
123058
123414
|
}
|
|
123059
123415
|
|
|
123060
123416
|
|
|
@@ -126237,6 +126593,11 @@ PendingExecutionResult ClientContext::ExecuteTaskInternal(ClientContextLock &loc
|
|
|
126237
126593
|
query_progress = active_query->progress_bar->GetCurrentPercentage();
|
|
126238
126594
|
}
|
|
126239
126595
|
return result;
|
|
126596
|
+
} catch (FatalException &ex) {
|
|
126597
|
+
// fatal exceptions invalidate the entire database
|
|
126598
|
+
result.SetError(PreservedError(ex));
|
|
126599
|
+
auto &db = DatabaseInstance::GetDatabase(*this);
|
|
126600
|
+
db.Invalidate();
|
|
126240
126601
|
} catch (const Exception &ex) {
|
|
126241
126602
|
result.SetError(PreservedError(ex));
|
|
126242
126603
|
} catch (std::exception &ex) {
|
|
@@ -126456,9 +126817,19 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
|
|
|
126456
126817
|
case StatementType::INSERT_STATEMENT:
|
|
126457
126818
|
case StatementType::DELETE_STATEMENT:
|
|
126458
126819
|
case StatementType::UPDATE_STATEMENT: {
|
|
126459
|
-
auto sql = statement->ToString();
|
|
126460
126820
|
Parser parser;
|
|
126461
|
-
|
|
126821
|
+
PreservedError error;
|
|
126822
|
+
try {
|
|
126823
|
+
parser.ParseQuery(statement->ToString());
|
|
126824
|
+
} catch (const Exception &ex) {
|
|
126825
|
+
error = PreservedError(ex);
|
|
126826
|
+
} catch (std::exception &ex) {
|
|
126827
|
+
error = PreservedError(ex);
|
|
126828
|
+
}
|
|
126829
|
+
if (error) {
|
|
126830
|
+
// error in verifying query
|
|
126831
|
+
return make_unique<PendingQueryResult>(error);
|
|
126832
|
+
}
|
|
126462
126833
|
statement = move(parser.statements[0]);
|
|
126463
126834
|
break;
|
|
126464
126835
|
}
|
|
@@ -137106,8 +137477,27 @@ namespace duckdb {
|
|
|
137106
137477
|
//===--------------------------------------------------------------------===//
|
|
137107
137478
|
// Install Extension
|
|
137108
137479
|
//===--------------------------------------------------------------------===//
|
|
137480
|
+
const string ExtensionHelper::NormalizeVersionTag(const string &version_tag) {
|
|
137481
|
+
if (version_tag.length() > 0 && version_tag[0] != 'v') {
|
|
137482
|
+
return "v" + version_tag;
|
|
137483
|
+
}
|
|
137484
|
+
return version_tag;
|
|
137485
|
+
}
|
|
137486
|
+
|
|
137487
|
+
bool ExtensionHelper::IsRelease(const string &version_tag) {
|
|
137488
|
+
return !StringUtil::Contains(version_tag, "-dev");
|
|
137489
|
+
}
|
|
137490
|
+
|
|
137491
|
+
const string ExtensionHelper::GetVersionDirectoryName() {
|
|
137492
|
+
if (IsRelease(DuckDB::LibraryVersion())) {
|
|
137493
|
+
return NormalizeVersionTag(DuckDB::LibraryVersion());
|
|
137494
|
+
} else {
|
|
137495
|
+
return DuckDB::SourceID();
|
|
137496
|
+
}
|
|
137497
|
+
}
|
|
137498
|
+
|
|
137109
137499
|
const vector<string> ExtensionHelper::PathComponents() {
|
|
137110
|
-
return vector<string> {".duckdb", "extensions",
|
|
137500
|
+
return vector<string> {".duckdb", "extensions", GetVersionDirectoryName(), DuckDB::Platform()};
|
|
137111
137501
|
}
|
|
137112
137502
|
|
|
137113
137503
|
string ExtensionHelper::ExtensionDirectory(ClientContext &context) {
|
|
@@ -137180,7 +137570,7 @@ void ExtensionHelper::InstallExtension(ClientContext &context, const string &ext
|
|
|
137180
137570
|
extension_name = "";
|
|
137181
137571
|
}
|
|
137182
137572
|
|
|
137183
|
-
auto url = StringUtil::Replace(url_template, "${REVISION}",
|
|
137573
|
+
auto url = StringUtil::Replace(url_template, "${REVISION}", GetVersionDirectoryName());
|
|
137184
137574
|
url = StringUtil::Replace(url, "${PLATFORM}", DuckDB::Platform());
|
|
137185
137575
|
url = StringUtil::Replace(url, "${NAME}", extension_name);
|
|
137186
137576
|
|
|
@@ -141402,7 +141792,7 @@ Value ForceCompressionSetting::GetSetting(ClientContext &context) {
|
|
|
141402
141792
|
//===--------------------------------------------------------------------===//
|
|
141403
141793
|
void HomeDirectorySetting::SetLocal(ClientContext &context, const Value &input) {
|
|
141404
141794
|
auto &config = ClientConfig::GetConfig(context);
|
|
141405
|
-
config.home_directory = input.IsNull() ?
|
|
141795
|
+
config.home_directory = input.IsNull() ? string() : input.ToString();
|
|
141406
141796
|
}
|
|
141407
141797
|
|
|
141408
141798
|
Value HomeDirectorySetting::GetSetting(ClientContext &context) {
|
|
@@ -142358,9 +142748,7 @@ void CardinalityEstimator::UpdateTotalDomains(JoinNode *node, LogicalOperator *o
|
|
|
142358
142748
|
// Get HLL stats here
|
|
142359
142749
|
auto actual_binding = relation_column_to_original_column[key];
|
|
142360
142750
|
|
|
142361
|
-
|
|
142362
|
-
// there is still a catalog table. Anybody know anything about this?
|
|
142363
|
-
auto base_stats = catalog_table->storage->GetStatistics(context, actual_binding.column_index);
|
|
142751
|
+
auto base_stats = catalog_table->GetStatistics(context, actual_binding.column_index);
|
|
142364
142752
|
if (base_stats) {
|
|
142365
142753
|
count = base_stats->GetDistinctCount();
|
|
142366
142754
|
}
|
|
@@ -143056,6 +143444,7 @@ private:
|
|
|
143056
143444
|
|
|
143057
143445
|
|
|
143058
143446
|
|
|
143447
|
+
|
|
143059
143448
|
namespace duckdb {
|
|
143060
143449
|
|
|
143061
143450
|
class DeliminatorPlanUpdater : LogicalOperatorVisitor {
|
|
@@ -143083,7 +143472,15 @@ void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
|
|
|
143083
143472
|
cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
|
|
143084
143473
|
continue;
|
|
143085
143474
|
}
|
|
143086
|
-
|
|
143475
|
+
Expression *rhs = cond.right.get();
|
|
143476
|
+
while (rhs->type == ExpressionType::OPERATOR_CAST) {
|
|
143477
|
+
auto &cast = (BoundCastExpression &)*rhs;
|
|
143478
|
+
rhs = cast.child.get();
|
|
143479
|
+
}
|
|
143480
|
+
if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
|
|
143481
|
+
throw InternalException("Erorr in deliminator: expected a bound column reference");
|
|
143482
|
+
}
|
|
143483
|
+
auto &colref = (BoundColumnRefExpression &)*rhs;
|
|
143087
143484
|
if (projection_map.find(colref.binding) != projection_map.end()) {
|
|
143088
143485
|
// value on the right is a projection of removed DelimGet
|
|
143089
143486
|
for (idx_t i = 0; i < decs->size(); i++) {
|
|
@@ -144231,7 +144628,10 @@ FilterResult FilterCombiner::AddBoundComparisonFilter(Expression *expr) {
|
|
|
144231
144628
|
auto node = GetNode(left_is_scalar ? comparison.right.get() : comparison.left.get());
|
|
144232
144629
|
idx_t equivalence_set = GetEquivalenceSet(node);
|
|
144233
144630
|
auto scalar = left_is_scalar ? comparison.left.get() : comparison.right.get();
|
|
144234
|
-
|
|
144631
|
+
Value constant_value;
|
|
144632
|
+
if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
|
|
144633
|
+
return FilterResult::UNSATISFIABLE;
|
|
144634
|
+
}
|
|
144235
144635
|
if (constant_value.IsNull()) {
|
|
144236
144636
|
// comparisons with null are always null (i.e. will never result in rows)
|
|
144237
144637
|
return FilterResult::UNSATISFIABLE;
|
|
@@ -144312,7 +144712,11 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
|
|
|
144312
144712
|
}
|
|
144313
144713
|
if (expr->IsFoldable()) {
|
|
144314
144714
|
// scalar condition, evaluate it
|
|
144315
|
-
|
|
144715
|
+
Value result;
|
|
144716
|
+
if (!ExpressionExecutor::TryEvaluateScalar(*expr, result)) {
|
|
144717
|
+
return FilterResult::UNSUPPORTED;
|
|
144718
|
+
}
|
|
144719
|
+
result = result.CastAs(LogicalType::BOOLEAN);
|
|
144316
144720
|
// check if the filter passes
|
|
144317
144721
|
if (result.IsNull() || !BooleanValue::Get(result)) {
|
|
144318
144722
|
// the filter does not pass the scalar test, create an empty result
|
|
@@ -144336,7 +144740,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
|
|
|
144336
144740
|
|
|
144337
144741
|
if (lower_is_scalar) {
|
|
144338
144742
|
auto scalar = comparison.lower.get();
|
|
144339
|
-
|
|
144743
|
+
Value constant_value;
|
|
144744
|
+
if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
|
|
144745
|
+
return FilterResult::UNSUPPORTED;
|
|
144746
|
+
}
|
|
144340
144747
|
|
|
144341
144748
|
// create the ExpressionValueInformation
|
|
144342
144749
|
ExpressionValueInformation info;
|
|
@@ -144369,7 +144776,10 @@ FilterResult FilterCombiner::AddFilter(Expression *expr) {
|
|
|
144369
144776
|
|
|
144370
144777
|
if (upper_is_scalar) {
|
|
144371
144778
|
auto scalar = comparison.upper.get();
|
|
144372
|
-
|
|
144779
|
+
Value constant_value;
|
|
144780
|
+
if (!ExpressionExecutor::TryEvaluateScalar(*scalar, constant_value)) {
|
|
144781
|
+
return FilterResult::UNSUPPORTED;
|
|
144782
|
+
}
|
|
144373
144783
|
|
|
144374
144784
|
// create the ExpressionValueInformation
|
|
144375
144785
|
ExpressionValueInformation info;
|
|
@@ -145281,7 +145691,6 @@ unique_ptr<Expression> InClauseRewriter::VisitReplace(BoundOperatorExpression &e
|
|
|
145281
145691
|
// IN clause with many children: try to generate a mark join that replaces this IN expression
|
|
145282
145692
|
// we can only do this if the expressions in the expression list are scalar
|
|
145283
145693
|
for (idx_t i = 1; i < expr.children.size(); i++) {
|
|
145284
|
-
D_ASSERT(expr.children[i]->return_type == in_type);
|
|
145285
145694
|
if (!expr.children[i]->IsFoldable()) {
|
|
145286
145695
|
// non-scalar expression
|
|
145287
145696
|
all_scalar = false;
|
|
@@ -147720,21 +148129,35 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownAggregate(unique_ptr<Logical
|
|
|
147720
148129
|
FilterPushdown child_pushdown(optimizer);
|
|
147721
148130
|
for (idx_t i = 0; i < filters.size(); i++) {
|
|
147722
148131
|
auto &f = *filters[i];
|
|
147723
|
-
|
|
147724
|
-
|
|
147725
|
-
|
|
147726
|
-
|
|
147727
|
-
|
|
147728
|
-
|
|
147729
|
-
|
|
147730
|
-
|
|
147731
|
-
|
|
147732
|
-
|
|
148132
|
+
if (f.bindings.find(aggr.aggregate_index) != f.bindings.end()) {
|
|
148133
|
+
// filter on aggregate: cannot pushdown
|
|
148134
|
+
continue;
|
|
148135
|
+
}
|
|
148136
|
+
if (f.bindings.find(aggr.groupings_index) != f.bindings.end()) {
|
|
148137
|
+
// filter on GROUPINGS function: cannot pushdown
|
|
148138
|
+
continue;
|
|
148139
|
+
}
|
|
148140
|
+
// if there are any empty grouping sets, we cannot push down filters
|
|
148141
|
+
bool has_empty_grouping_sets = false;
|
|
148142
|
+
for (auto &grp : aggr.grouping_sets) {
|
|
148143
|
+
if (grp.empty()) {
|
|
148144
|
+
has_empty_grouping_sets = true;
|
|
147733
148145
|
}
|
|
147734
|
-
// erase the filter from here
|
|
147735
|
-
filters.erase(filters.begin() + i);
|
|
147736
|
-
i--;
|
|
147737
148146
|
}
|
|
148147
|
+
if (has_empty_grouping_sets) {
|
|
148148
|
+
continue;
|
|
148149
|
+
}
|
|
148150
|
+
// no aggregate! we can push this down
|
|
148151
|
+
// rewrite any group bindings within the filter
|
|
148152
|
+
f.filter = ReplaceGroupBindings(aggr, move(f.filter));
|
|
148153
|
+
// add the filter to the child node
|
|
148154
|
+
if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
|
|
148155
|
+
// filter statically evaluates to false, strip tree
|
|
148156
|
+
return make_unique<LogicalEmptyResult>(move(op));
|
|
148157
|
+
}
|
|
148158
|
+
// erase the filter from here
|
|
148159
|
+
filters.erase(filters.begin() + i);
|
|
148160
|
+
i--;
|
|
147738
148161
|
}
|
|
147739
148162
|
child_pushdown.GenerateFilters();
|
|
147740
148163
|
|
|
@@ -152440,6 +152863,19 @@ unique_ptr<LogicalOperator> TopN::Optimize(unique_ptr<LogicalOperator> op) {
|
|
|
152440
152863
|
} // namespace duckdb
|
|
152441
152864
|
|
|
152442
152865
|
|
|
152866
|
+
namespace duckdb {
|
|
152867
|
+
|
|
152868
|
+
BasePipelineEvent::BasePipelineEvent(shared_ptr<Pipeline> pipeline_p)
|
|
152869
|
+
: Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
|
|
152870
|
+
}
|
|
152871
|
+
|
|
152872
|
+
BasePipelineEvent::BasePipelineEvent(Pipeline &pipeline_p)
|
|
152873
|
+
: Event(pipeline_p.executor), pipeline(pipeline_p.shared_from_this()) {
|
|
152874
|
+
}
|
|
152875
|
+
|
|
152876
|
+
} // namespace duckdb
|
|
152877
|
+
|
|
152878
|
+
|
|
152443
152879
|
|
|
152444
152880
|
|
|
152445
152881
|
|
|
@@ -152559,16 +152995,13 @@ public:
|
|
|
152559
152995
|
|
|
152560
152996
|
|
|
152561
152997
|
|
|
152562
|
-
|
|
152563
152998
|
namespace duckdb {
|
|
152564
152999
|
|
|
152565
|
-
|
|
153000
|
+
//! A PipelineEvent is responsible for scheduling a pipeline
|
|
153001
|
+
class PipelineEvent : public BasePipelineEvent {
|
|
152566
153002
|
public:
|
|
152567
153003
|
PipelineEvent(shared_ptr<Pipeline> pipeline);
|
|
152568
153004
|
|
|
152569
|
-
//! The pipeline that this event belongs to
|
|
152570
|
-
shared_ptr<Pipeline> pipeline;
|
|
152571
|
-
|
|
152572
153005
|
public:
|
|
152573
153006
|
void Schedule() override;
|
|
152574
153007
|
void FinishEvent() override;
|
|
@@ -152696,17 +153129,13 @@ private:
|
|
|
152696
153129
|
|
|
152697
153130
|
|
|
152698
153131
|
|
|
152699
|
-
|
|
152700
153132
|
namespace duckdb {
|
|
152701
153133
|
class Executor;
|
|
152702
153134
|
|
|
152703
|
-
class PipelineFinishEvent : public
|
|
153135
|
+
class PipelineFinishEvent : public BasePipelineEvent {
|
|
152704
153136
|
public:
|
|
152705
153137
|
PipelineFinishEvent(shared_ptr<Pipeline> pipeline);
|
|
152706
153138
|
|
|
152707
|
-
//! The pipeline that this event belongs to
|
|
152708
|
-
shared_ptr<Pipeline> pipeline;
|
|
152709
|
-
|
|
152710
153139
|
public:
|
|
152711
153140
|
void Schedule() override;
|
|
152712
153141
|
void FinishEvent() override;
|
|
@@ -152733,6 +153162,9 @@ Executor &Executor::Get(ClientContext &context) {
|
|
|
152733
153162
|
|
|
152734
153163
|
void Executor::AddEvent(shared_ptr<Event> event) {
|
|
152735
153164
|
lock_guard<mutex> elock(executor_lock);
|
|
153165
|
+
if (cancelled) {
|
|
153166
|
+
return;
|
|
153167
|
+
}
|
|
152736
153168
|
events.push_back(move(event));
|
|
152737
153169
|
}
|
|
152738
153170
|
|
|
@@ -153036,6 +153468,7 @@ void Executor::CancelTasks() {
|
|
|
153036
153468
|
vector<weak_ptr<Pipeline>> weak_references;
|
|
153037
153469
|
{
|
|
153038
153470
|
lock_guard<mutex> elock(executor_lock);
|
|
153471
|
+
cancelled = true;
|
|
153039
153472
|
weak_references.reserve(pipelines.size());
|
|
153040
153473
|
for (auto &pipeline : pipelines) {
|
|
153041
153474
|
weak_references.push_back(weak_ptr<Pipeline>(pipeline));
|
|
@@ -153112,10 +153545,10 @@ PendingExecutionResult Executor::ExecuteTask() {
|
|
|
153112
153545
|
lock_guard<mutex> elock(executor_lock);
|
|
153113
153546
|
pipelines.clear();
|
|
153114
153547
|
NextExecutor();
|
|
153115
|
-
if (
|
|
153548
|
+
if (HasError()) { // LCOV_EXCL_START
|
|
153116
153549
|
// an exception has occurred executing one of the pipelines
|
|
153117
153550
|
execution_result = PendingExecutionResult::EXECUTION_ERROR;
|
|
153118
|
-
|
|
153551
|
+
ThrowException();
|
|
153119
153552
|
} // LCOV_EXCL_STOP
|
|
153120
153553
|
execution_result = PendingExecutionResult::RESULT_READY;
|
|
153121
153554
|
return execution_result;
|
|
@@ -153124,6 +153557,7 @@ PendingExecutionResult Executor::ExecuteTask() {
|
|
|
153124
153557
|
void Executor::Reset() {
|
|
153125
153558
|
lock_guard<mutex> elock(executor_lock);
|
|
153126
153559
|
physical_plan = nullptr;
|
|
153560
|
+
cancelled = false;
|
|
153127
153561
|
owned_plan.reset();
|
|
153128
153562
|
root_executor.reset();
|
|
153129
153563
|
root_pipelines.clear();
|
|
@@ -153160,7 +153594,7 @@ vector<LogicalType> Executor::GetTypes() {
|
|
|
153160
153594
|
}
|
|
153161
153595
|
|
|
153162
153596
|
void Executor::PushError(PreservedError exception) {
|
|
153163
|
-
lock_guard<mutex> elock(
|
|
153597
|
+
lock_guard<mutex> elock(error_lock);
|
|
153164
153598
|
// interrupt execution of any other pipelines that belong to this executor
|
|
153165
153599
|
context.interrupted = true;
|
|
153166
153600
|
// push the exception onto the stack
|
|
@@ -153168,20 +153602,16 @@ void Executor::PushError(PreservedError exception) {
|
|
|
153168
153602
|
}
|
|
153169
153603
|
|
|
153170
153604
|
bool Executor::HasError() {
|
|
153171
|
-
lock_guard<mutex> elock(
|
|
153605
|
+
lock_guard<mutex> elock(error_lock);
|
|
153172
153606
|
return !exceptions.empty();
|
|
153173
153607
|
}
|
|
153174
153608
|
|
|
153175
153609
|
void Executor::ThrowException() {
|
|
153176
|
-
lock_guard<mutex> elock(
|
|
153177
|
-
ThrowExceptionInternal();
|
|
153178
|
-
}
|
|
153179
|
-
|
|
153180
|
-
void Executor::ThrowExceptionInternal() { // LCOV_EXCL_START
|
|
153610
|
+
lock_guard<mutex> elock(error_lock);
|
|
153181
153611
|
D_ASSERT(!exceptions.empty());
|
|
153182
153612
|
auto &entry = exceptions[0];
|
|
153183
153613
|
entry.Throw();
|
|
153184
|
-
}
|
|
153614
|
+
}
|
|
153185
153615
|
|
|
153186
153616
|
void Executor::Flush(ThreadContext &tcontext) {
|
|
153187
153617
|
profiler->Flush(tcontext.profiler);
|
|
@@ -153446,6 +153876,9 @@ void Pipeline::Ready() {
|
|
|
153446
153876
|
}
|
|
153447
153877
|
|
|
153448
153878
|
void Pipeline::Finalize(Event &event) {
|
|
153879
|
+
if (executor.HasError()) {
|
|
153880
|
+
return;
|
|
153881
|
+
}
|
|
153449
153882
|
D_ASSERT(ready);
|
|
153450
153883
|
try {
|
|
153451
153884
|
auto sink_state = sink->Finalize(*this, event, executor.context, *sink->sink_state);
|
|
@@ -153556,16 +153989,25 @@ void PipelineCompleteEvent::FinalizeFinish() {
|
|
|
153556
153989
|
} // namespace duckdb
|
|
153557
153990
|
|
|
153558
153991
|
|
|
153992
|
+
|
|
153559
153993
|
namespace duckdb {
|
|
153560
153994
|
|
|
153561
|
-
PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p)
|
|
153562
|
-
: Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
|
|
153995
|
+
PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
|
|
153563
153996
|
}
|
|
153564
153997
|
|
|
153565
153998
|
void PipelineEvent::Schedule() {
|
|
153566
153999
|
auto event = shared_from_this();
|
|
153567
|
-
pipeline->
|
|
153568
|
-
|
|
154000
|
+
auto &executor = pipeline->executor;
|
|
154001
|
+
try {
|
|
154002
|
+
pipeline->Schedule(event);
|
|
154003
|
+
D_ASSERT(total_tasks > 0);
|
|
154004
|
+
} catch (Exception &ex) {
|
|
154005
|
+
executor.PushError(PreservedError(ex));
|
|
154006
|
+
} catch (std::exception &ex) {
|
|
154007
|
+
executor.PushError(PreservedError(ex));
|
|
154008
|
+
} catch (...) { // LCOV_EXCL_START
|
|
154009
|
+
executor.PushError(PreservedError("Unknown exception in Finalize!"));
|
|
154010
|
+
} // LCOV_EXCL_STOP
|
|
153569
154011
|
}
|
|
153570
154012
|
|
|
153571
154013
|
void PipelineEvent::FinishEvent() {
|
|
@@ -153948,8 +154390,7 @@ void PipelineExecutor::EndOperator(PhysicalOperator *op, DataChunk *chunk) {
|
|
|
153948
154390
|
|
|
153949
154391
|
namespace duckdb {
|
|
153950
154392
|
|
|
153951
|
-
PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p)
|
|
153952
|
-
: Event(pipeline_p->executor), pipeline(move(pipeline_p)) {
|
|
154393
|
+
PipelineFinishEvent::PipelineFinishEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
|
|
153953
154394
|
}
|
|
153954
154395
|
|
|
153955
154396
|
void PipelineFinishEvent::Schedule() {
|
|
@@ -167484,7 +167925,7 @@ string QueryNode::ResultModifiersToString() const {
|
|
|
167484
167925
|
} else if (modifier.type == ResultModifierType::LIMIT_PERCENT_MODIFIER) {
|
|
167485
167926
|
auto &limit_p_modifier = (LimitPercentModifier &)modifier;
|
|
167486
167927
|
if (limit_p_modifier.limit) {
|
|
167487
|
-
result += " LIMIT " + limit_p_modifier.limit->ToString() + " %";
|
|
167928
|
+
result += " LIMIT (" + limit_p_modifier.limit->ToString() + ") %";
|
|
167488
167929
|
}
|
|
167489
167930
|
if (limit_p_modifier.offset) {
|
|
167490
167931
|
result += " OFFSET " + limit_p_modifier.offset->ToString();
|
|
@@ -171360,7 +171801,7 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause *de_with_clause,
|
|
|
171360
171801
|
}
|
|
171361
171802
|
// we need a query
|
|
171362
171803
|
if (!cte->ctequery || cte->ctequery->type != duckdb_libpgquery::T_PGSelectStmt) {
|
|
171363
|
-
throw
|
|
171804
|
+
throw NotImplementedException("A CTE needs a SELECT");
|
|
171364
171805
|
}
|
|
171365
171806
|
|
|
171366
171807
|
// CTE transformation can either result in inlining for non recursive CTEs, or in recursive CTE bindings
|
|
@@ -174956,6 +175397,8 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
|
|
|
174956
175397
|
// we didn't bind columns, try again in children
|
|
174957
175398
|
return BindResult(error);
|
|
174958
175399
|
}
|
|
175400
|
+
} else if (depth > 0 && !aggregate_binder.HasBoundColumns()) {
|
|
175401
|
+
return BindResult("Aggregate with only constant parameters has to be bound in the root subquery");
|
|
174959
175402
|
}
|
|
174960
175403
|
if (!filter_error.empty()) {
|
|
174961
175404
|
return BindResult(filter_error);
|
|
@@ -174963,8 +175406,9 @@ BindResult SelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFuncti
|
|
|
174963
175406
|
|
|
174964
175407
|
if (aggr.filter) {
|
|
174965
175408
|
auto &child = (BoundExpression &)*aggr.filter;
|
|
174966
|
-
bound_filter = move(child.expr);
|
|
175409
|
+
bound_filter = BoundCastExpression::AddCastToType(move(child.expr), LogicalType::BOOLEAN);
|
|
174967
175410
|
}
|
|
175411
|
+
|
|
174968
175412
|
// all children bound successfully
|
|
174969
175413
|
// extract the children and types
|
|
174970
175414
|
vector<LogicalType> types;
|
|
@@ -176117,7 +176561,7 @@ BindResult ExpressionBinder::BindMacro(FunctionExpression &function, ScalarMacro
|
|
|
176117
176561
|
string error =
|
|
176118
176562
|
MacroFunction::ValidateArguments(*macro_func->function, macro_func->name, function, positionals, defaults);
|
|
176119
176563
|
if (!error.empty()) {
|
|
176120
|
-
|
|
176564
|
+
throw BinderException(binder.FormatError(*expr->get(), error));
|
|
176121
176565
|
}
|
|
176122
176566
|
|
|
176123
176567
|
// create a MacroBinding to bind this macro's parameters to its arguments
|
|
@@ -177140,10 +177584,13 @@ public:
|
|
|
177140
177584
|
public:
|
|
177141
177585
|
unique_ptr<Expression> Bind(unique_ptr<ParsedExpression> expr);
|
|
177142
177586
|
|
|
177143
|
-
idx_t MaxCount() {
|
|
177587
|
+
idx_t MaxCount() const {
|
|
177144
177588
|
return max_count;
|
|
177145
177589
|
}
|
|
177146
177590
|
|
|
177591
|
+
bool HasExtraList() const {
|
|
177592
|
+
return extra_list;
|
|
177593
|
+
}
|
|
177147
177594
|
unique_ptr<Expression> CreateExtraReference(unique_ptr<ParsedExpression> expr);
|
|
177148
177595
|
|
|
177149
177596
|
private:
|
|
@@ -177185,6 +177632,9 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
|
|
|
177185
177632
|
Value &delimiter_value) {
|
|
177186
177633
|
auto new_binder = Binder::CreateBinder(context, this, true);
|
|
177187
177634
|
if (delimiter->HasSubquery()) {
|
|
177635
|
+
if (!order_binder.HasExtraList()) {
|
|
177636
|
+
throw BinderException("Subquery in LIMIT/OFFSET not supported in set operation");
|
|
177637
|
+
}
|
|
177188
177638
|
return order_binder.CreateExtraReference(move(delimiter));
|
|
177189
177639
|
}
|
|
177190
177640
|
ExpressionBinder expr_binder(*new_binder, context);
|
|
@@ -177195,6 +177645,8 @@ unique_ptr<Expression> Binder::BindDelimiter(ClientContext &context, OrderBinder
|
|
|
177195
177645
|
delimiter_value = ExpressionExecutor::EvaluateScalar(*expr).CastAs(type);
|
|
177196
177646
|
return nullptr;
|
|
177197
177647
|
}
|
|
177648
|
+
// move any correlated columns to this binder
|
|
177649
|
+
MoveCorrelatedExpressions(*new_binder);
|
|
177198
177650
|
return expr;
|
|
177199
177651
|
}
|
|
177200
177652
|
|
|
@@ -179798,11 +180250,13 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
|
|
|
179798
180250
|
BindDefaultValues(base.columns, result->bound_defaults);
|
|
179799
180251
|
}
|
|
179800
180252
|
|
|
180253
|
+
idx_t regular_column_count = 0;
|
|
179801
180254
|
// bind collations to detect any unsupported collation errors
|
|
179802
180255
|
for (auto &column : base.columns) {
|
|
179803
180256
|
if (column.Generated()) {
|
|
179804
180257
|
continue;
|
|
179805
180258
|
}
|
|
180259
|
+
regular_column_count++;
|
|
179806
180260
|
if (column.Type().id() == LogicalTypeId::VARCHAR) {
|
|
179807
180261
|
ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
|
|
179808
180262
|
}
|
|
@@ -179814,6 +180268,9 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
|
|
|
179814
180268
|
result->dependencies.insert(type_dependency);
|
|
179815
180269
|
}
|
|
179816
180270
|
}
|
|
180271
|
+
if (regular_column_count == 0) {
|
|
180272
|
+
throw BinderException("Creating a table without physical (non-generated) columns is not supported");
|
|
180273
|
+
}
|
|
179817
180274
|
properties.allow_stream_result = false;
|
|
179818
180275
|
return result;
|
|
179819
180276
|
}
|
|
@@ -180241,6 +180698,13 @@ BoundStatement Binder::Bind(ExportStatement &stmt) {
|
|
|
180241
180698
|
info->schema = table->schema->name;
|
|
180242
180699
|
info->table = table->name;
|
|
180243
180700
|
|
|
180701
|
+
// We can not export generated columns
|
|
180702
|
+
for (auto &col : table->columns) {
|
|
180703
|
+
if (!col.Generated()) {
|
|
180704
|
+
info->select_list.push_back(col.GetName());
|
|
180705
|
+
}
|
|
180706
|
+
}
|
|
180707
|
+
|
|
180244
180708
|
exported_data.table_name = info->table;
|
|
180245
180709
|
exported_data.schema_name = info->schema;
|
|
180246
180710
|
exported_data.file_path = info->file_path;
|
|
@@ -180486,7 +180950,10 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
|
|
|
180486
180950
|
}
|
|
180487
180951
|
|
|
180488
180952
|
// parse select statement and add to logical plan
|
|
180489
|
-
auto
|
|
180953
|
+
auto select_binder = Binder::CreateBinder(context, this);
|
|
180954
|
+
auto root_select = select_binder->Bind(*stmt.select_statement);
|
|
180955
|
+
MoveCorrelatedExpressions(*select_binder);
|
|
180956
|
+
|
|
180490
180957
|
CheckInsertColumnCountMismatch(expected_columns, root_select.types.size(), !stmt.columns.empty(),
|
|
180491
180958
|
table->name.c_str());
|
|
180492
180959
|
|
|
@@ -181768,6 +182235,18 @@ string Binder::RetrieveUsingBinding(Binder ¤t_binder, UsingColumnSet *curr
|
|
|
181768
182235
|
return binding;
|
|
181769
182236
|
}
|
|
181770
182237
|
|
|
182238
|
+
static vector<string> RemoveDuplicateUsingColumns(const vector<string> &using_columns) {
|
|
182239
|
+
vector<string> result;
|
|
182240
|
+
case_insensitive_set_t handled_columns;
|
|
182241
|
+
for (auto &using_column : using_columns) {
|
|
182242
|
+
if (handled_columns.find(using_column) == handled_columns.end()) {
|
|
182243
|
+
handled_columns.insert(using_column);
|
|
182244
|
+
result.push_back(using_column);
|
|
182245
|
+
}
|
|
182246
|
+
}
|
|
182247
|
+
return result;
|
|
182248
|
+
}
|
|
182249
|
+
|
|
181771
182250
|
unique_ptr<BoundTableRef> Binder::Bind(JoinRef &ref) {
|
|
181772
182251
|
auto result = make_unique<BoundJoinRef>();
|
|
181773
182252
|
result->left_binder = Binder::CreateBinder(context, this);
|
|
@@ -181837,6 +182316,8 @@ unique_ptr<BoundTableRef> Binder::Bind(JoinRef &ref) {
|
|
|
181837
182316
|
D_ASSERT(!result->condition);
|
|
181838
182317
|
extra_using_columns = ref.using_columns;
|
|
181839
182318
|
}
|
|
182319
|
+
extra_using_columns = RemoveDuplicateUsingColumns(extra_using_columns);
|
|
182320
|
+
|
|
181840
182321
|
if (!extra_using_columns.empty()) {
|
|
181841
182322
|
vector<UsingColumnSet *> left_using_bindings;
|
|
181842
182323
|
vector<UsingColumnSet *> right_using_bindings;
|
|
@@ -182282,7 +182763,7 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundEmptyTableRef &ref) {
|
|
|
182282
182763
|
namespace duckdb {
|
|
182283
182764
|
|
|
182284
182765
|
unique_ptr<LogicalOperator> Binder::CreatePlan(BoundExpressionListRef &ref) {
|
|
182285
|
-
auto root = make_unique_base<LogicalOperator, LogicalDummyScan>(
|
|
182766
|
+
auto root = make_unique_base<LogicalOperator, LogicalDummyScan>(GenerateTableIndex());
|
|
182286
182767
|
// values list, first plan any subqueries in the list
|
|
182287
182768
|
for (auto &expr_list : ref.values) {
|
|
182288
182769
|
for (auto &expr : expr_list) {
|
|
@@ -184835,7 +185316,7 @@ BindResult ConstantBinder::BindExpression(unique_ptr<ParsedExpression> *expr_ptr
|
|
|
184835
185316
|
case ExpressionClass::COLUMN_REF:
|
|
184836
185317
|
return BindResult(clause + " cannot contain column names");
|
|
184837
185318
|
case ExpressionClass::SUBQUERY:
|
|
184838
|
-
|
|
185319
|
+
throw BinderException(clause + " cannot contain subqueries");
|
|
184839
185320
|
case ExpressionClass::DEFAULT:
|
|
184840
185321
|
return BindResult(clause + " cannot contain DEFAULT clause");
|
|
184841
185322
|
case ExpressionClass::WINDOW:
|
|
@@ -185095,6 +185576,9 @@ unique_ptr<Expression> OrderBinder::CreateProjectionReference(ParsedExpression &
|
|
|
185095
185576
|
}
|
|
185096
185577
|
|
|
185097
185578
|
unique_ptr<Expression> OrderBinder::CreateExtraReference(unique_ptr<ParsedExpression> expr) {
|
|
185579
|
+
if (!extra_list) {
|
|
185580
|
+
throw InternalException("CreateExtraReference called without extra_list");
|
|
185581
|
+
}
|
|
185098
185582
|
auto result = CreateProjectionReference(*expr, extra_list->size());
|
|
185099
185583
|
extra_list->push_back(move(expr));
|
|
185100
185584
|
return result;
|
|
@@ -189221,6 +189705,9 @@ unique_ptr<LogicalOperator> FlattenDependentJoins::PushDownDependentJoinInternal
|
|
|
189221
189705
|
case LogicalOperatorType::LOGICAL_ORDER_BY:
|
|
189222
189706
|
plan->children[0] = PushDownDependentJoin(move(plan->children[0]));
|
|
189223
189707
|
return plan;
|
|
189708
|
+
case LogicalOperatorType::LOGICAL_RECURSIVE_CTE: {
|
|
189709
|
+
throw ParserException("Recursive CTEs not supported in correlated subquery");
|
|
189710
|
+
}
|
|
189224
189711
|
default:
|
|
189225
189712
|
throw InternalException("Logical operator type \"%s\" for dependent join", LogicalOperatorToString(plan->type));
|
|
189226
189713
|
}
|
|
@@ -191347,7 +191834,7 @@ void CheckpointManager::CreateCheckpoint() {
|
|
|
191347
191834
|
wal->Flush();
|
|
191348
191835
|
|
|
191349
191836
|
if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_BEFORE_HEADER) {
|
|
191350
|
-
throw
|
|
191837
|
+
throw FatalException("Checkpoint aborted before header write because of PRAGMA checkpoint_abort flag");
|
|
191351
191838
|
}
|
|
191352
191839
|
|
|
191353
191840
|
// finally write the updated header
|
|
@@ -191356,7 +191843,7 @@ void CheckpointManager::CreateCheckpoint() {
|
|
|
191356
191843
|
block_manager.WriteHeader(header);
|
|
191357
191844
|
|
|
191358
191845
|
if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_BEFORE_TRUNCATE) {
|
|
191359
|
-
throw
|
|
191846
|
+
throw FatalException("Checkpoint aborted before truncate because of PRAGMA checkpoint_abort flag");
|
|
191360
191847
|
}
|
|
191361
191848
|
|
|
191362
191849
|
// truncate the WAL
|
|
@@ -196907,7 +197394,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
|
|
|
196907
197394
|
}
|
|
196908
197395
|
|
|
196909
197396
|
// Alter column to add new constraint
|
|
196910
|
-
DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<
|
|
197397
|
+
DataTable::DataTable(ClientContext &context, DataTable &parent, unique_ptr<BoundConstraint> constraint)
|
|
196911
197398
|
: info(parent.info), db(parent.db), total_rows(parent.total_rows.load()), row_groups(parent.row_groups),
|
|
196912
197399
|
is_root(true) {
|
|
196913
197400
|
|
|
@@ -197082,7 +197569,7 @@ void DataTable::InitializeParallelScan(ClientContext &context, ParallelTableScan
|
|
|
197082
197569
|
|
|
197083
197570
|
bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState &state, TableScanState &scan_state,
|
|
197084
197571
|
const vector<column_t> &column_ids) {
|
|
197085
|
-
while (state.current_row_group) {
|
|
197572
|
+
while (state.current_row_group && state.current_row_group->count > 0) {
|
|
197086
197573
|
idx_t vector_index;
|
|
197087
197574
|
idx_t max_row;
|
|
197088
197575
|
if (ClientConfig::GetConfig(context).verify_parallelism) {
|
|
@@ -197096,13 +197583,8 @@ bool DataTable::NextParallelScan(ClientContext &context, ParallelTableScanState
|
|
|
197096
197583
|
max_row = state.current_row_group->start + state.current_row_group->count;
|
|
197097
197584
|
}
|
|
197098
197585
|
max_row = MinValue<idx_t>(max_row, state.max_row);
|
|
197099
|
-
bool need_to_scan
|
|
197100
|
-
|
|
197101
|
-
need_to_scan = false;
|
|
197102
|
-
} else {
|
|
197103
|
-
need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
|
|
197104
|
-
state.current_row_group, vector_index, max_row);
|
|
197105
|
-
}
|
|
197586
|
+
bool need_to_scan = InitializeScanInRowGroup(scan_state, column_ids, scan_state.table_filters,
|
|
197587
|
+
state.current_row_group, vector_index, max_row);
|
|
197106
197588
|
if (ClientConfig::GetConfig(context).verify_parallelism) {
|
|
197107
197589
|
state.vector_index++;
|
|
197108
197590
|
if (state.vector_index * STANDARD_VECTOR_SIZE >= state.current_row_group->count) {
|
|
@@ -197361,14 +197843,15 @@ static void VerifyDeleteForeignKeyConstraint(const BoundForeignKeyConstraint &bf
|
|
|
197361
197843
|
VerifyForeignKeyConstraint(bfk, context, chunk, false);
|
|
197362
197844
|
}
|
|
197363
197845
|
|
|
197364
|
-
void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const
|
|
197846
|
+
void DataTable::VerifyNewConstraint(ClientContext &context, DataTable &parent, const BoundConstraint *constraint) {
|
|
197365
197847
|
if (constraint->type != ConstraintType::NOT_NULL) {
|
|
197366
197848
|
throw NotImplementedException("FIXME: ALTER COLUMN with such constraint is not supported yet");
|
|
197367
197849
|
}
|
|
197368
197850
|
// scan the original table, check if there's any null value
|
|
197369
|
-
auto ¬_null_constraint = (
|
|
197851
|
+
auto ¬_null_constraint = (BoundNotNullConstraint &)*constraint;
|
|
197370
197852
|
auto &transaction = Transaction::GetTransaction(context);
|
|
197371
197853
|
vector<LogicalType> scan_types;
|
|
197854
|
+
D_ASSERT(not_null_constraint.index < parent.column_definitions.size());
|
|
197372
197855
|
scan_types.push_back(parent.column_definitions[not_null_constraint.index].Type());
|
|
197373
197856
|
DataChunk scan_chunk;
|
|
197374
197857
|
auto &allocator = Allocator::Get(context);
|
|
@@ -198125,6 +198608,9 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
|
|
|
198125
198608
|
return nullptr;
|
|
198126
198609
|
}
|
|
198127
198610
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
198611
|
+
if (column_id >= column_stats.size()) {
|
|
198612
|
+
throw InternalException("Call to GetStatistics is out of range");
|
|
198613
|
+
}
|
|
198128
198614
|
return column_stats[column_id]->stats->Copy();
|
|
198129
198615
|
}
|
|
198130
198616
|
|
|
@@ -199413,7 +199899,7 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
|
|
|
199413
199899
|
|
|
199414
199900
|
auto &config = DBConfig::GetConfig(db);
|
|
199415
199901
|
if (config.options.checkpoint_abort == CheckpointAbort::DEBUG_ABORT_AFTER_FREE_LIST_WRITE) {
|
|
199416
|
-
throw
|
|
199902
|
+
throw FatalException("Checkpoint aborted after free list write because of PRAGMA checkpoint_abort flag");
|
|
199417
199903
|
}
|
|
199418
199904
|
|
|
199419
199905
|
if (!use_direct_io) {
|
|
@@ -200939,6 +201425,7 @@ idx_t ChunkVectorInfo::Delete(Transaction &transaction, row_t rows[], idx_t coun
|
|
|
200939
201425
|
}
|
|
200940
201426
|
// after verifying that there are no conflicts we mark the tuple as deleted
|
|
200941
201427
|
deleted[rows[i]] = transaction.transaction_id;
|
|
201428
|
+
rows[deleted_tuples] = rows[i];
|
|
200942
201429
|
deleted_tuples++;
|
|
200943
201430
|
}
|
|
200944
201431
|
return deleted_tuples;
|
|
@@ -201266,6 +201753,8 @@ public:
|
|
|
201266
201753
|
idx_t ScanCommitted(idx_t vector_index, ColumnScanState &state, Vector &result, bool allow_updates) override;
|
|
201267
201754
|
idx_t ScanCount(ColumnScanState &state, Vector &result, idx_t count) override;
|
|
201268
201755
|
|
|
201756
|
+
void Skip(ColumnScanState &state, idx_t count = STANDARD_VECTOR_SIZE) override;
|
|
201757
|
+
|
|
201269
201758
|
void InitializeAppend(ColumnAppendState &state) override;
|
|
201270
201759
|
void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count) override;
|
|
201271
201760
|
void RevertAppend(row_t start_row) override;
|
|
@@ -203845,9 +204334,15 @@ void VersionDeleteState::Flush() {
|
|
|
203845
204334
|
return;
|
|
203846
204335
|
}
|
|
203847
204336
|
// delete in the current info
|
|
203848
|
-
|
|
203849
|
-
//
|
|
203850
|
-
|
|
204337
|
+
// it is possible for delete statements to delete the same tuple multiple times when combined with a USING clause
|
|
204338
|
+
// in the current_info->Delete, we check which tuples are actually deleted (excluding duplicate deletions)
|
|
204339
|
+
// this is returned in the actual_delete_count
|
|
204340
|
+
auto actual_delete_count = current_info->Delete(transaction, rows, count);
|
|
204341
|
+
delete_count += actual_delete_count;
|
|
204342
|
+
if (actual_delete_count > 0) {
|
|
204343
|
+
// now push the delete into the undo buffer, but only if any deletes were actually performed
|
|
204344
|
+
transaction.PushDelete(table, current_info, rows, actual_delete_count, base_row + chunk_row);
|
|
204345
|
+
}
|
|
203851
204346
|
count = 0;
|
|
203852
204347
|
}
|
|
203853
204348
|
|
|
@@ -204224,6 +204719,15 @@ idx_t StructColumnData::ScanCount(ColumnScanState &state, Vector &result, idx_t
|
|
|
204224
204719
|
return scan_count;
|
|
204225
204720
|
}
|
|
204226
204721
|
|
|
204722
|
+
void StructColumnData::Skip(ColumnScanState &state, idx_t count) {
|
|
204723
|
+
validity.Skip(state.child_states[0], count);
|
|
204724
|
+
|
|
204725
|
+
// skip inside the sub-columns
|
|
204726
|
+
for (idx_t child_idx = 0; child_idx < sub_columns.size(); child_idx++) {
|
|
204727
|
+
sub_columns[child_idx]->Skip(state.child_states[child_idx + 1], count);
|
|
204728
|
+
}
|
|
204729
|
+
}
|
|
204730
|
+
|
|
204227
204731
|
void StructColumnData::InitializeAppend(ColumnAppendState &state) {
|
|
204228
204732
|
ColumnAppendState validity_append;
|
|
204229
204733
|
validity.InitializeAppend(validity_append);
|
|
@@ -206683,6 +207187,7 @@ void CleanupState::CleanupUpdate(UpdateInfo *info) {
|
|
|
206683
207187
|
|
|
206684
207188
|
void CleanupState::CleanupDelete(DeleteInfo *info) {
|
|
206685
207189
|
auto version_table = info->table;
|
|
207190
|
+
D_ASSERT(version_table->info->cardinality >= info->count);
|
|
206686
207191
|
version_table->info->cardinality -= info->count;
|
|
206687
207192
|
if (version_table->info->indexes.Empty()) {
|
|
206688
207193
|
// this table has no indexes: no cleanup to be done
|
|
@@ -260108,49 +260613,84 @@ static void AssignInvalidUTF8Reason(UnicodeInvalidReason *invalid_reason, size_t
|
|
|
260108
260613
|
}
|
|
260109
260614
|
}
|
|
260110
260615
|
|
|
260111
|
-
|
|
260112
|
-
|
|
260113
|
-
|
|
260114
|
-
|
|
260115
|
-
|
|
260116
|
-
|
|
260117
|
-
|
|
260118
|
-
|
|
260119
|
-
|
|
260120
|
-
|
|
260121
|
-
|
|
260122
|
-
|
|
260123
|
-
|
|
260124
|
-
|
|
260125
|
-
if ((s[++i] & 0xC0) != 0x80) {
|
|
260126
|
-
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260127
|
-
return UnicodeType::INVALID;
|
|
260128
|
-
}
|
|
260129
|
-
if ((c & 0xE0) == 0xC0) {
|
|
260130
|
-
continue;
|
|
260131
|
-
}
|
|
260132
|
-
if ((s[++i] & 0xC0) != 0x80) {
|
|
260133
|
-
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260134
|
-
return UnicodeType::INVALID;
|
|
260135
|
-
}
|
|
260136
|
-
if ((c & 0xF0) == 0xE0) {
|
|
260137
|
-
continue;
|
|
260138
|
-
}
|
|
260139
|
-
if ((s[++i] & 0xC0) != 0x80) {
|
|
260616
|
+
template <const int nextra_bytes, const int mask>
|
|
260617
|
+
static inline UnicodeType
|
|
260618
|
+
UTF8ExtraByteLoop(const int first_pos_seq, int utf8char, size_t& i,
|
|
260619
|
+
const char *s, const size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
|
|
260620
|
+
if ((len - i) < (nextra_bytes + 1)) {
|
|
260621
|
+
/* incomplete byte sequence */
|
|
260622
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260623
|
+
return UnicodeType::INVALID;
|
|
260624
|
+
}
|
|
260625
|
+
for (size_t j = 0 ; j < nextra_bytes; j++) {
|
|
260626
|
+
int c = (int) s[++i];
|
|
260627
|
+
/* now validate the extra bytes */
|
|
260628
|
+
if ((c & 0xC0) != 0x80) {
|
|
260629
|
+
/* extra byte is not in the format 10xxxxxx */
|
|
260140
260630
|
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260141
260631
|
return UnicodeType::INVALID;
|
|
260142
260632
|
}
|
|
260143
|
-
|
|
260144
|
-
|
|
260145
|
-
|
|
260146
|
-
|
|
260633
|
+
utf8char = (utf8char << 6) | (c & 0x3F);
|
|
260634
|
+
}
|
|
260635
|
+
if ((utf8char & mask) == 0) {
|
|
260636
|
+
/* invalid UTF-8 codepoint, not shortest possible */
|
|
260637
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
|
|
260638
|
+
return UnicodeType::INVALID;
|
|
260639
|
+
}
|
|
260640
|
+
if (utf8char > 0x10FFFF) {
|
|
260641
|
+
/* value not representable by Unicode */
|
|
260642
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
|
|
260643
|
+
return UnicodeType::INVALID;
|
|
260644
|
+
}
|
|
260645
|
+
if ((utf8char & 0x1FFF800) == 0xD800) {
|
|
260646
|
+
/* Unicode characters from U+D800 to U+DFFF are surrogate characters used by UTF-16 which are invalid in UTF-8 */
|
|
260647
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
|
|
260147
260648
|
return UnicodeType::INVALID;
|
|
260148
260649
|
}
|
|
260650
|
+
return UnicodeType::UNICODE;
|
|
260651
|
+
}
|
|
260652
|
+
|
|
260653
|
+
UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
|
|
260654
|
+
UnicodeType type = UnicodeType::ASCII;
|
|
260149
260655
|
|
|
260656
|
+
for (size_t i = 0; i < len; i++) {
|
|
260657
|
+
int c = (int) s[i];
|
|
260658
|
+
|
|
260659
|
+
if ((c & 0x80) == 0) {
|
|
260660
|
+
/* 1 byte sequence */
|
|
260661
|
+
if (c == '\0') {
|
|
260662
|
+
/* NULL byte not allowed */
|
|
260663
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
|
|
260664
|
+
return UnicodeType::INVALID;
|
|
260665
|
+
}
|
|
260666
|
+
} else {
|
|
260667
|
+
int first_pos_seq = i;
|
|
260668
|
+
|
|
260669
|
+
if ((c & 0xE0) == 0xC0) {
|
|
260670
|
+
/* 2 byte sequence */
|
|
260671
|
+
int utf8char = c & 0x1F;
|
|
260672
|
+
type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
260673
|
+
} else if ((c & 0xF0) == 0xE0) {
|
|
260674
|
+
/* 3 byte sequence */
|
|
260675
|
+
int utf8char = c & 0x0F;
|
|
260676
|
+
type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
260677
|
+
} else if ((c & 0xF8) == 0xF0) {
|
|
260678
|
+
/* 4 byte sequence */
|
|
260679
|
+
int utf8char = c & 0x07;
|
|
260680
|
+
type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
260681
|
+
} else {
|
|
260682
|
+
/* invalid UTF-8 start byte */
|
|
260683
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260684
|
+
return UnicodeType::INVALID;
|
|
260685
|
+
}
|
|
260686
|
+
if (type == UnicodeType::INVALID) {
|
|
260687
|
+
return type;
|
|
260688
|
+
}
|
|
260689
|
+
}
|
|
260690
|
+
}
|
|
260150
260691
|
return type;
|
|
260151
260692
|
}
|
|
260152
260693
|
|
|
260153
|
-
|
|
260154
260694
|
char* Utf8Proc::Normalize(const char *s, size_t len) {
|
|
260155
260695
|
assert(s);
|
|
260156
260696
|
assert(Utf8Proc::Analyze(s, len) != UnicodeType::INVALID);
|
|
@@ -322230,6 +322770,8 @@ exit:
|
|
|
322230
322770
|
// See the end of this file for a list
|
|
322231
322771
|
|
|
322232
322772
|
|
|
322773
|
+
// otherwise we have different definitions for mbedtls_pk_context / mbedtls_sha256_context
|
|
322774
|
+
#define MBEDTLS_ALLOW_PRIVATE_ACCESS
|
|
322233
322775
|
|
|
322234
322776
|
|
|
322235
322777
|
|