duckdb 0.5.2-dev809.0 → 0.5.2-dev833.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +370 -79
- package/src/duckdb.hpp +35 -13
- package/src/parquet-amalgamation.cpp +37555 -37555
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -29106,7 +29106,7 @@ struct RowOperations {
|
|
|
29106
29106
|
namespace duckdb {
|
|
29107
29107
|
|
|
29108
29108
|
template <class OP, class RETURN_TYPE, typename... ARGS>
|
|
29109
|
-
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...
|
|
29109
|
+
RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
|
|
29110
29110
|
D_ASSERT(radix_bits <= sizeof(hash_t) * 8);
|
|
29111
29111
|
switch (radix_bits) {
|
|
29112
29112
|
case 1:
|
|
@@ -29135,7 +29135,7 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
|
|
|
29135
29135
|
}
|
|
29136
29136
|
|
|
29137
29137
|
template <class OP, class RETURN_TYPE, idx_t radix_bits_1, typename... ARGS>
|
|
29138
|
-
RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&...
|
|
29138
|
+
RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&...args) {
|
|
29139
29139
|
D_ASSERT(radix_bits_2 <= sizeof(hash_t) * 8);
|
|
29140
29140
|
switch (radix_bits_2) {
|
|
29141
29141
|
case 1:
|
|
@@ -29164,7 +29164,7 @@ RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&... args) {
|
|
|
29164
29164
|
}
|
|
29165
29165
|
|
|
29166
29166
|
template <class OP, class RETURN_TYPE, typename... ARGS>
|
|
29167
|
-
RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&...
|
|
29167
|
+
RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&...args) {
|
|
29168
29168
|
D_ASSERT(radix_bits_1 <= sizeof(hash_t) * 8);
|
|
29169
29169
|
switch (radix_bits_1) {
|
|
29170
29170
|
case 1:
|
|
@@ -49455,6 +49455,9 @@ void Vector::Initialize(bool zero_data, idx_t capacity) {
|
|
|
49455
49455
|
memset(data, 0, capacity * type_size);
|
|
49456
49456
|
}
|
|
49457
49457
|
}
|
|
49458
|
+
if (capacity > STANDARD_VECTOR_SIZE) {
|
|
49459
|
+
validity.Resize(STANDARD_VECTOR_SIZE, capacity);
|
|
49460
|
+
}
|
|
49458
49461
|
}
|
|
49459
49462
|
|
|
49460
49463
|
struct DataArrays {
|
|
@@ -93661,6 +93664,13 @@ unique_ptr<FunctionData> HistogramBindFunction(ClientContext &context, Aggregate
|
|
|
93661
93664
|
vector<unique_ptr<Expression>> &arguments) {
|
|
93662
93665
|
|
|
93663
93666
|
D_ASSERT(arguments.size() == 1);
|
|
93667
|
+
|
|
93668
|
+
if (arguments[0]->return_type.id() == LogicalTypeId::LIST ||
|
|
93669
|
+
arguments[0]->return_type.id() == LogicalTypeId::STRUCT ||
|
|
93670
|
+
arguments[0]->return_type.id() == LogicalTypeId::MAP) {
|
|
93671
|
+
throw NotImplementedException("Unimplemented type for histogram %s", arguments[0]->return_type.ToString());
|
|
93672
|
+
}
|
|
93673
|
+
|
|
93664
93674
|
child_list_t<LogicalType> struct_children;
|
|
93665
93675
|
struct_children.push_back({"key", LogicalType::LIST(arguments[0]->return_type)});
|
|
93666
93676
|
struct_children.push_back({"value", LogicalType::LIST(LogicalType::UBIGINT)});
|
|
@@ -95714,6 +95724,11 @@ struct VectorCastHelpers {
|
|
|
95714
95724
|
}
|
|
95715
95725
|
};
|
|
95716
95726
|
|
|
95727
|
+
struct VectorStringifiedListParser {
|
|
95728
|
+
static idx_t CountParts(const string_t &input);
|
|
95729
|
+
static bool SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start, Vector &child);
|
|
95730
|
+
};
|
|
95731
|
+
|
|
95717
95732
|
} // namespace duckdb
|
|
95718
95733
|
|
|
95719
95734
|
|
|
@@ -96484,20 +96499,8 @@ BoundCastInfo DefaultCasts::EnumCastSwitch(BindCastInput &input, const LogicalTy
|
|
|
96484
96499
|
|
|
96485
96500
|
namespace duckdb {
|
|
96486
96501
|
|
|
96487
|
-
|
|
96488
|
-
|
|
96489
|
-
}
|
|
96490
|
-
|
|
96491
|
-
BoundCastInfo child_cast_info;
|
|
96492
|
-
|
|
96493
|
-
public:
|
|
96494
|
-
unique_ptr<BoundCastData> Copy() const override {
|
|
96495
|
-
return make_unique<ListBoundCastData>(child_cast_info.Copy());
|
|
96496
|
-
}
|
|
96497
|
-
};
|
|
96498
|
-
|
|
96499
|
-
unique_ptr<BoundCastData> BindListToListCast(BindCastInput &input, const LogicalType &source,
|
|
96500
|
-
const LogicalType &target) {
|
|
96502
|
+
unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &input, const LogicalType &source,
|
|
96503
|
+
const LogicalType &target) {
|
|
96501
96504
|
vector<BoundCastInfo> child_cast_info;
|
|
96502
96505
|
auto &source_child_type = ListType::GetChildType(source);
|
|
96503
96506
|
auto &result_child_type = ListType::GetChildType(target);
|
|
@@ -96608,11 +96611,11 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
|
|
|
96608
96611
|
BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
96609
96612
|
switch (target.id()) {
|
|
96610
96613
|
case LogicalTypeId::LIST:
|
|
96611
|
-
return BoundCastInfo(ListToListCast, BindListToListCast(input, source, target));
|
|
96614
|
+
return BoundCastInfo(ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
|
|
96612
96615
|
case LogicalTypeId::VARCHAR:
|
|
96613
96616
|
case LogicalTypeId::JSON:
|
|
96614
|
-
return BoundCastInfo(ListToVarcharCast,
|
|
96615
|
-
|
|
96617
|
+
return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
|
|
96618
|
+
input, source, LogicalType::LIST(LogicalType::VARCHAR)));
|
|
96616
96619
|
default:
|
|
96617
96620
|
return DefaultCasts::TryVectorNullCast;
|
|
96618
96621
|
}
|
|
@@ -96950,9 +96953,97 @@ static BoundCastInfo VectorStringCastNumericSwitch(BindCastInput &input, const L
|
|
|
96950
96953
|
}
|
|
96951
96954
|
}
|
|
96952
96955
|
|
|
96956
|
+
bool StringListCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result, ValidityMask &result_mask,
|
|
96957
|
+
idx_t count, CastParameters ¶meters, const SelectionVector *sel) {
|
|
96958
|
+
|
|
96959
|
+
idx_t total_list_size = 0;
|
|
96960
|
+
for (idx_t i = 0; i < count; i++) {
|
|
96961
|
+
idx_t idx = i;
|
|
96962
|
+
if (sel) {
|
|
96963
|
+
idx = sel->get_index(i);
|
|
96964
|
+
}
|
|
96965
|
+
if (!source_mask.RowIsValid(idx)) {
|
|
96966
|
+
continue;
|
|
96967
|
+
}
|
|
96968
|
+
total_list_size += VectorStringifiedListParser::CountParts(source_data[idx]);
|
|
96969
|
+
}
|
|
96970
|
+
|
|
96971
|
+
Vector varchar_vector(LogicalType::VARCHAR, total_list_size);
|
|
96972
|
+
|
|
96973
|
+
ListVector::Reserve(result, total_list_size);
|
|
96974
|
+
ListVector::SetListSize(result, total_list_size);
|
|
96975
|
+
|
|
96976
|
+
auto list_data = ListVector::GetData(result);
|
|
96977
|
+
auto child_data = FlatVector::GetData<string_t>(varchar_vector);
|
|
96978
|
+
|
|
96979
|
+
bool all_converted = true;
|
|
96980
|
+
idx_t total = 0;
|
|
96981
|
+
for (idx_t i = 0; i < count; i++) {
|
|
96982
|
+
idx_t idx = i;
|
|
96983
|
+
if (sel) {
|
|
96984
|
+
idx = sel->get_index(i);
|
|
96985
|
+
}
|
|
96986
|
+
if (!source_mask.RowIsValid(idx)) {
|
|
96987
|
+
result_mask.SetInvalid(i);
|
|
96988
|
+
continue;
|
|
96989
|
+
}
|
|
96990
|
+
|
|
96991
|
+
list_data[i].offset = total;
|
|
96992
|
+
auto valid =
|
|
96993
|
+
VectorStringifiedListParser::SplitStringifiedList(source_data[idx], child_data, total, varchar_vector);
|
|
96994
|
+
if (!valid) {
|
|
96995
|
+
string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
|
|
96996
|
+
"' can't be cast to the destination type LIST";
|
|
96997
|
+
HandleVectorCastError::Operation<string_t>(text, result_mask, idx, parameters.error_message, all_converted);
|
|
96998
|
+
}
|
|
96999
|
+
list_data[i].length = total - list_data[i].offset; // length is the amount of parts coming from this string
|
|
97000
|
+
}
|
|
97001
|
+
D_ASSERT(total_list_size == total);
|
|
97002
|
+
|
|
97003
|
+
auto &result_child = ListVector::GetEntry(result);
|
|
97004
|
+
auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
|
|
97005
|
+
CastParameters child_parameters(parameters, cast_data.child_cast_info.cast_data.get());
|
|
97006
|
+
return cast_data.child_cast_info.function(varchar_vector, result_child, total_list_size, child_parameters) &&
|
|
97007
|
+
all_converted;
|
|
97008
|
+
}
|
|
97009
|
+
|
|
97010
|
+
bool StringListCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
|
97011
|
+
D_ASSERT(source.GetType().id() == LogicalTypeId::VARCHAR);
|
|
97012
|
+
D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
|
|
97013
|
+
|
|
97014
|
+
switch (source.GetVectorType()) {
|
|
97015
|
+
case VectorType::CONSTANT_VECTOR: {
|
|
97016
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
97017
|
+
|
|
97018
|
+
auto source_data = ConstantVector::GetData<string_t>(source);
|
|
97019
|
+
auto &source_mask = ConstantVector::Validity(source);
|
|
97020
|
+
auto &result_mask = ConstantVector::Validity(result);
|
|
97021
|
+
|
|
97022
|
+
return StringListCastLoop(source_data, source_mask, result, result_mask, 1, parameters, nullptr);
|
|
97023
|
+
}
|
|
97024
|
+
default: {
|
|
97025
|
+
UnifiedVectorFormat unified_source;
|
|
97026
|
+
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
97027
|
+
|
|
97028
|
+
source.ToUnifiedFormat(count, unified_source);
|
|
97029
|
+
auto source_sel = unified_source.sel;
|
|
97030
|
+
auto source_data = (string_t *)unified_source.data;
|
|
97031
|
+
auto &source_mask = unified_source.validity;
|
|
97032
|
+
auto &result_mask = FlatVector::Validity(result);
|
|
97033
|
+
|
|
97034
|
+
return StringListCastLoop(source_data, source_mask, result, result_mask, count, parameters, source_sel);
|
|
97035
|
+
}
|
|
97036
|
+
}
|
|
97037
|
+
}
|
|
97038
|
+
|
|
97039
|
+
BoundCastInfo StringToListCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
97040
|
+
// second argument allows for a secondary casting function to be passed in the CastParameters
|
|
97041
|
+
return BoundCastInfo(&StringListCast,
|
|
97042
|
+
ListBoundCastData::BindListToListCast(input, LogicalType::LIST(LogicalType::VARCHAR), target));
|
|
97043
|
+
}
|
|
97044
|
+
|
|
96953
97045
|
BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const LogicalType &source,
|
|
96954
97046
|
const LogicalType &target) {
|
|
96955
|
-
// now switch on the target type
|
|
96956
97047
|
switch (target.id()) {
|
|
96957
97048
|
case LogicalTypeId::DATE:
|
|
96958
97049
|
return BoundCastInfo(&VectorCastHelpers::TryCastErrorLoop<string_t, date_t, duckdb::TryCastErrorMessage>);
|
|
@@ -96980,6 +97071,8 @@ BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const Logical
|
|
|
96980
97071
|
case LogicalTypeId::VARCHAR:
|
|
96981
97072
|
case LogicalTypeId::JSON:
|
|
96982
97073
|
return &DefaultCasts::ReinterpretCast;
|
|
97074
|
+
case LogicalTypeId::LIST:
|
|
97075
|
+
return StringToListCast(input, source, target);
|
|
96983
97076
|
default:
|
|
96984
97077
|
return VectorStringCastNumericSwitch(input, source, target);
|
|
96985
97078
|
}
|
|
@@ -97341,6 +97434,144 @@ BoundCastInfo DefaultCasts::UUIDCastSwitch(BindCastInput &input, const LogicalTy
|
|
|
97341
97434
|
} // namespace duckdb
|
|
97342
97435
|
|
|
97343
97436
|
|
|
97437
|
+
namespace duckdb {
|
|
97438
|
+
|
|
97439
|
+
struct CountPartOperation {
|
|
97440
|
+
idx_t count = 0;
|
|
97441
|
+
|
|
97442
|
+
void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
|
|
97443
|
+
count++;
|
|
97444
|
+
}
|
|
97445
|
+
};
|
|
97446
|
+
|
|
97447
|
+
struct SplitStringOperation {
|
|
97448
|
+
SplitStringOperation(string_t *child_data, idx_t &child_start, Vector &child)
|
|
97449
|
+
: child_data(child_data), child_start(child_start), child(child) {
|
|
97450
|
+
}
|
|
97451
|
+
|
|
97452
|
+
string_t *child_data;
|
|
97453
|
+
idx_t &child_start;
|
|
97454
|
+
Vector &child;
|
|
97455
|
+
|
|
97456
|
+
void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
|
|
97457
|
+
|
|
97458
|
+
if ((pos - start_pos) >= 4 && buf[start_pos] == 'N' && buf[start_pos + 1] == 'U' && buf[start_pos + 2] == 'L' &&
|
|
97459
|
+
buf[start_pos + 3] == 'L') {
|
|
97460
|
+
FlatVector::SetNull(child, child_start, true);
|
|
97461
|
+
child_start++;
|
|
97462
|
+
return;
|
|
97463
|
+
}
|
|
97464
|
+
child_data[child_start] = StringVector::AddString(child, buf + start_pos, pos - start_pos);
|
|
97465
|
+
child_start++;
|
|
97466
|
+
}
|
|
97467
|
+
};
|
|
97468
|
+
|
|
97469
|
+
static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
|
|
97470
|
+
char quote = buf[pos];
|
|
97471
|
+
pos++;
|
|
97472
|
+
|
|
97473
|
+
while (pos < len) {
|
|
97474
|
+
if (buf[pos] == quote) {
|
|
97475
|
+
return true;
|
|
97476
|
+
}
|
|
97477
|
+
pos++;
|
|
97478
|
+
}
|
|
97479
|
+
return false;
|
|
97480
|
+
}
|
|
97481
|
+
|
|
97482
|
+
static bool SkipToClose(idx_t &idx, const char *buf, idx_t &len, idx_t &lvl) {
|
|
97483
|
+
while (idx < len) {
|
|
97484
|
+
if (buf[idx] == '[') {
|
|
97485
|
+
if (!SkipToClose(++idx, buf, len, lvl)) {
|
|
97486
|
+
return false;
|
|
97487
|
+
}
|
|
97488
|
+
lvl++;
|
|
97489
|
+
idx++;
|
|
97490
|
+
}
|
|
97491
|
+
if (buf[idx] == '"' || buf[idx] == '\'') {
|
|
97492
|
+
SkipToCloseQuotes(idx, buf, len);
|
|
97493
|
+
}
|
|
97494
|
+
if (buf[idx] == ']') {
|
|
97495
|
+
lvl--;
|
|
97496
|
+
return true;
|
|
97497
|
+
}
|
|
97498
|
+
idx++;
|
|
97499
|
+
}
|
|
97500
|
+
return false;
|
|
97501
|
+
}
|
|
97502
|
+
|
|
97503
|
+
template <class OP>
|
|
97504
|
+
static bool SplitStringifiedListInternal(const string_t &input, OP &state) {
|
|
97505
|
+
const char *buf = input.GetDataUnsafe();
|
|
97506
|
+
idx_t len = input.GetSize();
|
|
97507
|
+
idx_t lvl = 1;
|
|
97508
|
+
idx_t pos = 0;
|
|
97509
|
+
|
|
97510
|
+
while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
|
|
97511
|
+
pos++;
|
|
97512
|
+
}
|
|
97513
|
+
if (pos == len || buf[pos] != '[') {
|
|
97514
|
+
return false;
|
|
97515
|
+
}
|
|
97516
|
+
pos++;
|
|
97517
|
+
while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
|
|
97518
|
+
pos++;
|
|
97519
|
+
}
|
|
97520
|
+
|
|
97521
|
+
idx_t start_pos = pos;
|
|
97522
|
+
while (pos < len) {
|
|
97523
|
+
if (buf[pos] == '[') {
|
|
97524
|
+
if (!SkipToClose(++pos, buf, len, ++lvl)) {
|
|
97525
|
+
return false;
|
|
97526
|
+
}
|
|
97527
|
+
} else if (buf[pos] == '"' || buf[pos] == '\'') {
|
|
97528
|
+
SkipToCloseQuotes(pos, buf, len);
|
|
97529
|
+
} else if (buf[pos] == ',' || buf[pos] == ']') {
|
|
97530
|
+
idx_t trailing_whitespace = 0;
|
|
97531
|
+
while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
|
|
97532
|
+
trailing_whitespace++;
|
|
97533
|
+
}
|
|
97534
|
+
if (!(buf[pos] == ']' && start_pos == (pos))) {
|
|
97535
|
+
state.HandleValue(buf, start_pos, pos - trailing_whitespace);
|
|
97536
|
+
} // else the list is empty
|
|
97537
|
+
if (buf[pos] == ']') {
|
|
97538
|
+
lvl--;
|
|
97539
|
+
break;
|
|
97540
|
+
}
|
|
97541
|
+
while (pos + 1 < len && StringUtil::CharacterIsSpace(buf[pos + 1])) {
|
|
97542
|
+
pos++;
|
|
97543
|
+
}
|
|
97544
|
+
start_pos = pos + 1;
|
|
97545
|
+
}
|
|
97546
|
+
pos++;
|
|
97547
|
+
}
|
|
97548
|
+
pos++;
|
|
97549
|
+
while (pos < len) {
|
|
97550
|
+
if (!StringUtil::CharacterIsSpace(buf[pos])) {
|
|
97551
|
+
return false;
|
|
97552
|
+
}
|
|
97553
|
+
pos++;
|
|
97554
|
+
}
|
|
97555
|
+
if (lvl != 0) {
|
|
97556
|
+
return false;
|
|
97557
|
+
}
|
|
97558
|
+
return true;
|
|
97559
|
+
}
|
|
97560
|
+
|
|
97561
|
+
bool VectorStringifiedListParser::SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start,
|
|
97562
|
+
Vector &child) {
|
|
97563
|
+
SplitStringOperation state(child_data, child_start, child);
|
|
97564
|
+
return SplitStringifiedListInternal<SplitStringOperation>(input, state);
|
|
97565
|
+
}
|
|
97566
|
+
|
|
97567
|
+
idx_t VectorStringifiedListParser::CountParts(const string_t &input) {
|
|
97568
|
+
CountPartOperation state;
|
|
97569
|
+
SplitStringifiedListInternal<CountPartOperation>(input, state);
|
|
97570
|
+
return state.count;
|
|
97571
|
+
}
|
|
97572
|
+
} // namespace duckdb
|
|
97573
|
+
|
|
97574
|
+
|
|
97344
97575
|
namespace duckdb {
|
|
97345
97576
|
|
|
97346
97577
|
//! The target type determines the preferred implicit casts
|
|
@@ -201589,6 +201820,7 @@ void DataTable::AppendLock(TableAppendState &state) {
|
|
|
201589
201820
|
throw TransactionException("Transaction conflict: adding entries to a table that has been altered!");
|
|
201590
201821
|
}
|
|
201591
201822
|
state.row_start = row_groups->GetTotalRows();
|
|
201823
|
+
state.current_row = state.row_start;
|
|
201592
201824
|
}
|
|
201593
201825
|
|
|
201594
201826
|
void DataTable::InitializeAppend(Transaction &transaction, TableAppendState &state, idx_t append_count) {
|
|
@@ -201652,9 +201884,6 @@ void DataTable::MergeStorage(RowGroupCollection &data, TableIndexList &indexes,
|
|
|
201652
201884
|
row_groups->MergeStorage(data);
|
|
201653
201885
|
stats.MergeStats(other_stats);
|
|
201654
201886
|
row_groups->Verify();
|
|
201655
|
-
if (!indexes.Empty()) {
|
|
201656
|
-
throw InternalException("FIXME: merge indexes");
|
|
201657
|
-
}
|
|
201658
201887
|
}
|
|
201659
201888
|
|
|
201660
201889
|
void DataTable::WriteToLog(WriteAheadLog &log, idx_t row_start, idx_t count) {
|
|
@@ -202149,7 +202378,7 @@ namespace duckdb {
|
|
|
202149
202378
|
// Local Table Storage
|
|
202150
202379
|
//===--------------------------------------------------------------------===//
|
|
202151
202380
|
LocalTableStorage::LocalTableStorage(DataTable &table)
|
|
202152
|
-
: table(table), allocator(Allocator::Get(table.db)), deleted_rows(0) {
|
|
202381
|
+
: table(&table), allocator(Allocator::Get(table.db)), deleted_rows(0) {
|
|
202153
202382
|
auto types = table.GetTypes();
|
|
202154
202383
|
row_groups = make_shared<RowGroupCollection>(table.info, TableIOManager::Get(table).GetBlockManagerForRowData(),
|
|
202155
202384
|
types, MAX_ROW_ID, 0);
|
|
@@ -202174,7 +202403,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
|
|
|
202174
202403
|
LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, idx_t changed_idx,
|
|
202175
202404
|
const LogicalType &target_type, const vector<column_t> &bound_columns,
|
|
202176
202405
|
Expression &cast_expr)
|
|
202177
|
-
: table(new_dt), allocator(Allocator::Get(table
|
|
202406
|
+
: table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
|
|
202178
202407
|
partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
|
|
202179
202408
|
if (partial_manager) {
|
|
202180
202409
|
partial_manager->FlushPartialBlocks();
|
|
@@ -202187,7 +202416,7 @@ LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &paren
|
|
|
202187
202416
|
}
|
|
202188
202417
|
|
|
202189
202418
|
LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, idx_t drop_idx)
|
|
202190
|
-
: table(new_dt), allocator(Allocator::Get(table
|
|
202419
|
+
: table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
|
|
202191
202420
|
partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
|
|
202192
202421
|
if (partial_manager) {
|
|
202193
202422
|
partial_manager->FlushPartialBlocks();
|
|
@@ -202200,9 +202429,9 @@ LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &paren
|
|
|
202200
202429
|
|
|
202201
202430
|
LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &parent, ColumnDefinition &new_column,
|
|
202202
202431
|
Expression *default_value)
|
|
202203
|
-
: table(new_dt), allocator(Allocator::Get(table
|
|
202432
|
+
: table(&new_dt), allocator(Allocator::Get(table->db)), deleted_rows(parent.deleted_rows),
|
|
202204
202433
|
partial_manager(move(parent.partial_manager)), written_blocks(move(parent.written_blocks)) {
|
|
202205
|
-
idx_t new_column_idx = parent.table
|
|
202434
|
+
idx_t new_column_idx = parent.table->column_definitions.size();
|
|
202206
202435
|
stats.InitializeAddColumn(parent.stats, new_column.GetType());
|
|
202207
202436
|
row_groups = parent.row_groups->AddColumn(new_column, default_value, stats.GetStats(new_column_idx));
|
|
202208
202437
|
parent.row_groups.reset();
|
|
@@ -202312,11 +202541,7 @@ void LocalStorage::Append(LocalAppendState &state, DataChunk &chunk) {
|
|
|
202312
202541
|
void LocalTableStorage::CheckFlushToDisk() {
|
|
202313
202542
|
// we finished writing a complete row group
|
|
202314
202543
|
// check if we should pre-emptively write it to disk
|
|
202315
|
-
if (table
|
|
202316
|
-
return;
|
|
202317
|
-
}
|
|
202318
|
-
if (!table.info->indexes.Empty()) {
|
|
202319
|
-
// we have indexes - we cannot merge
|
|
202544
|
+
if (table->info->IsTemporary() || StorageManager::GetStorageManager(table->db).InMemory()) {
|
|
202320
202545
|
return;
|
|
202321
202546
|
}
|
|
202322
202547
|
if (deleted_rows != 0) {
|
|
@@ -202326,7 +202551,7 @@ void LocalTableStorage::CheckFlushToDisk() {
|
|
|
202326
202551
|
// we should! write the second-to-last row group to disk
|
|
202327
202552
|
// allocate the partial block-manager if none is allocated yet
|
|
202328
202553
|
if (!partial_manager) {
|
|
202329
|
-
auto &block_manager = table
|
|
202554
|
+
auto &block_manager = table->info->table_io_manager->GetBlockManagerForRowData();
|
|
202330
202555
|
partial_manager = make_unique<PartialBlockManager>(block_manager);
|
|
202331
202556
|
}
|
|
202332
202557
|
// flush second-to-last row group
|
|
@@ -202338,12 +202563,11 @@ void LocalTableStorage::FlushToDisk(RowGroup *row_group) {
|
|
|
202338
202563
|
// flush the specified row group
|
|
202339
202564
|
D_ASSERT(row_group);
|
|
202340
202565
|
D_ASSERT(deleted_rows == 0);
|
|
202341
|
-
D_ASSERT(table.info->indexes.Empty());
|
|
202342
202566
|
D_ASSERT(partial_manager);
|
|
202343
202567
|
//! The set of column compression types (if any)
|
|
202344
202568
|
vector<CompressionType> compression_types;
|
|
202345
202569
|
D_ASSERT(compression_types.empty());
|
|
202346
|
-
for (auto &column : table
|
|
202570
|
+
for (auto &column : table->column_definitions) {
|
|
202347
202571
|
compression_types.push_back(column.CompressionType());
|
|
202348
202572
|
}
|
|
202349
202573
|
auto row_group_pointer = row_group->WriteToDisk(*partial_manager, compression_types);
|
|
@@ -202410,24 +202634,23 @@ void LocalStorage::Update(DataTable *table, Vector &row_ids, const vector<column
|
|
|
202410
202634
|
}
|
|
202411
202635
|
|
|
202412
202636
|
template <class T>
|
|
202413
|
-
bool
|
|
202414
|
-
|
|
202415
|
-
|
|
202416
|
-
for (idx_t i = 0; i <
|
|
202417
|
-
|
|
202637
|
+
bool LocalTableStorage::ScanTableStorage(Transaction &transaction, const vector<column_t> &column_ids, T &&fun) {
|
|
202638
|
+
auto all_types = table->GetTypes();
|
|
202639
|
+
vector<LogicalType> scan_types;
|
|
202640
|
+
for (idx_t i = 0; i < column_ids.size(); i++) {
|
|
202641
|
+
scan_types.push_back(all_types[column_ids[i]]);
|
|
202418
202642
|
}
|
|
202419
|
-
|
|
202420
202643
|
DataChunk chunk;
|
|
202421
|
-
chunk.Initialize(
|
|
202644
|
+
chunk.Initialize(allocator, scan_types);
|
|
202422
202645
|
|
|
202423
202646
|
// initialize the scan
|
|
202424
202647
|
TableScanState state;
|
|
202425
202648
|
state.Initialize(column_ids, nullptr);
|
|
202426
|
-
|
|
202649
|
+
InitializeScan(state.local_state, nullptr);
|
|
202427
202650
|
|
|
202428
202651
|
while (true) {
|
|
202429
202652
|
chunk.Reset();
|
|
202430
|
-
|
|
202653
|
+
state.local_state.Scan(transaction, chunk);
|
|
202431
202654
|
if (chunk.size() == 0) {
|
|
202432
202655
|
return true;
|
|
202433
202656
|
}
|
|
@@ -202437,6 +202660,78 @@ bool LocalStorage::ScanTableStorage(DataTable &table, LocalTableStorage &storage
|
|
|
202437
202660
|
}
|
|
202438
202661
|
}
|
|
202439
202662
|
|
|
202663
|
+
template <class T>
|
|
202664
|
+
bool LocalTableStorage::ScanTableStorage(Transaction &transaction, T &&fun) {
|
|
202665
|
+
vector<column_t> column_ids;
|
|
202666
|
+
column_ids.reserve(table->column_definitions.size());
|
|
202667
|
+
for (idx_t i = 0; i < table->column_definitions.size(); i++) {
|
|
202668
|
+
column_ids.push_back(i);
|
|
202669
|
+
}
|
|
202670
|
+
return ScanTableStorage(transaction, column_ids, fun);
|
|
202671
|
+
}
|
|
202672
|
+
|
|
202673
|
+
void LocalTableStorage::AppendToIndexes(Transaction &transaction, TableAppendState &append_state, idx_t append_count,
|
|
202674
|
+
bool append_to_table) {
|
|
202675
|
+
bool constraint_violated = false;
|
|
202676
|
+
if (append_to_table) {
|
|
202677
|
+
table->InitializeAppend(transaction, append_state, append_count);
|
|
202678
|
+
}
|
|
202679
|
+
if (append_to_table) {
|
|
202680
|
+
// appending: need to scan entire
|
|
202681
|
+
ScanTableStorage(transaction, [&](DataChunk &chunk) -> bool {
|
|
202682
|
+
// append this chunk to the indexes of the table
|
|
202683
|
+
if (!table->AppendToIndexes(chunk, append_state.current_row)) {
|
|
202684
|
+
constraint_violated = true;
|
|
202685
|
+
return false;
|
|
202686
|
+
}
|
|
202687
|
+
// append to base table
|
|
202688
|
+
table->Append(chunk, append_state);
|
|
202689
|
+
return true;
|
|
202690
|
+
});
|
|
202691
|
+
} else {
|
|
202692
|
+
// only need to scan for index append
|
|
202693
|
+
// figure out which columns we need to scan for the set of indexes
|
|
202694
|
+
auto columns = table->info->indexes.GetRequiredColumns();
|
|
202695
|
+
// create an empty mock chunk that contains all the correct types for the table
|
|
202696
|
+
DataChunk mock_chunk;
|
|
202697
|
+
mock_chunk.InitializeEmpty(table->GetTypes());
|
|
202698
|
+
ScanTableStorage(transaction, columns, [&](DataChunk &chunk) -> bool {
|
|
202699
|
+
// construct the mock chunk by referencing the required columns
|
|
202700
|
+
for (idx_t i = 0; i < columns.size(); i++) {
|
|
202701
|
+
mock_chunk.data[columns[i]].Reference(chunk.data[i]);
|
|
202702
|
+
}
|
|
202703
|
+
mock_chunk.SetCardinality(chunk);
|
|
202704
|
+
// append this chunk to the indexes of the table
|
|
202705
|
+
if (!table->AppendToIndexes(mock_chunk, append_state.current_row)) {
|
|
202706
|
+
constraint_violated = true;
|
|
202707
|
+
return false;
|
|
202708
|
+
}
|
|
202709
|
+
append_state.current_row += chunk.size();
|
|
202710
|
+
return true;
|
|
202711
|
+
});
|
|
202712
|
+
}
|
|
202713
|
+
if (constraint_violated) {
|
|
202714
|
+
// need to revert the append
|
|
202715
|
+
row_t current_row = append_state.row_start;
|
|
202716
|
+
// remove the data from the indexes, if there are any indexes
|
|
202717
|
+
ScanTableStorage(transaction, [&](DataChunk &chunk) -> bool {
|
|
202718
|
+
// append this chunk to the indexes of the table
|
|
202719
|
+
table->RemoveFromIndexes(append_state, chunk, current_row);
|
|
202720
|
+
|
|
202721
|
+
current_row += chunk.size();
|
|
202722
|
+
if (current_row >= append_state.current_row) {
|
|
202723
|
+
// finished deleting all rows from the index: abort now
|
|
202724
|
+
return false;
|
|
202725
|
+
}
|
|
202726
|
+
return true;
|
|
202727
|
+
});
|
|
202728
|
+
if (append_to_table) {
|
|
202729
|
+
table->RevertAppendInternal(append_state.row_start, append_count);
|
|
202730
|
+
}
|
|
202731
|
+
throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
|
|
202732
|
+
}
|
|
202733
|
+
}
|
|
202734
|
+
|
|
202440
202735
|
void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
|
|
202441
202736
|
// bulk append threshold: a full row group
|
|
202442
202737
|
static constexpr const idx_t MERGE_THRESHOLD = RowGroup::ROW_GROUP_SIZE;
|
|
@@ -202452,10 +202747,17 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
|
|
|
202452
202747
|
TableAppendState append_state;
|
|
202453
202748
|
table.AppendLock(append_state);
|
|
202454
202749
|
if ((append_state.row_start == 0 || storage.row_groups->GetTotalRows() >= MERGE_THRESHOLD) &&
|
|
202455
|
-
storage.
|
|
202750
|
+
storage.deleted_rows == 0) {
|
|
202456
202751
|
// table is currently empty OR we are bulk appending: move over the storage directly
|
|
202457
202752
|
// first flush any out-standing storage nodes
|
|
202458
202753
|
storage.FlushToDisk();
|
|
202754
|
+
// now append to the indexes (if there are any)
|
|
202755
|
+
// FIXME: we should be able to merge the transaction-local index directly into the main table index
|
|
202756
|
+
// as long we just rewrite some row-ids
|
|
202757
|
+
if (!table.info->indexes.Empty()) {
|
|
202758
|
+
storage.AppendToIndexes(transaction, append_state, append_count, false);
|
|
202759
|
+
}
|
|
202760
|
+
// finally move over the row groups
|
|
202459
202761
|
table.MergeStorage(*storage.row_groups, storage.indexes, storage.stats);
|
|
202460
202762
|
} else {
|
|
202461
202763
|
if (storage.partial_manager || !storage.written_blocks.empty()) {
|
|
@@ -202463,36 +202765,8 @@ void LocalStorage::Flush(DataTable &table, LocalTableStorage &storage) {
|
|
|
202463
202765
|
// revert the data we have already written
|
|
202464
202766
|
storage.Rollback();
|
|
202465
202767
|
}
|
|
202466
|
-
|
|
202467
|
-
|
|
202468
|
-
ScanTableStorage(table, storage, [&](DataChunk &chunk) -> bool {
|
|
202469
|
-
// append this chunk to the indexes of the table
|
|
202470
|
-
if (!table.AppendToIndexes(chunk, append_state.current_row)) {
|
|
202471
|
-
constraint_violated = true;
|
|
202472
|
-
return false;
|
|
202473
|
-
}
|
|
202474
|
-
// append to base table
|
|
202475
|
-
table.Append(chunk, append_state);
|
|
202476
|
-
return true;
|
|
202477
|
-
});
|
|
202478
|
-
if (constraint_violated) {
|
|
202479
|
-
// need to revert the append
|
|
202480
|
-
row_t current_row = append_state.row_start;
|
|
202481
|
-
// remove the data from the indexes, if there are any indexes
|
|
202482
|
-
ScanTableStorage(table, storage, [&](DataChunk &chunk) -> bool {
|
|
202483
|
-
// append this chunk to the indexes of the table
|
|
202484
|
-
table.RemoveFromIndexes(append_state, chunk, current_row);
|
|
202485
|
-
|
|
202486
|
-
current_row += chunk.size();
|
|
202487
|
-
if (current_row >= append_state.current_row) {
|
|
202488
|
-
// finished deleting all rows from the index: abort now
|
|
202489
|
-
return false;
|
|
202490
|
-
}
|
|
202491
|
-
return true;
|
|
202492
|
-
});
|
|
202493
|
-
table.RevertAppendInternal(append_state.row_start, append_count);
|
|
202494
|
-
throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
|
|
202495
|
-
}
|
|
202768
|
+
// append to the indexes and append to the base table
|
|
202769
|
+
storage.AppendToIndexes(transaction, append_state, append_count, true);
|
|
202496
202770
|
}
|
|
202497
202771
|
transaction.PushAppend(&table, append_state.row_start, append_count);
|
|
202498
202772
|
}
|
|
@@ -202523,7 +202797,7 @@ void LocalTableStorage::Rollback() {
|
|
|
202523
202797
|
partial_manager->Clear();
|
|
202524
202798
|
partial_manager.reset();
|
|
202525
202799
|
}
|
|
202526
|
-
auto &block_manager = table
|
|
202800
|
+
auto &block_manager = table->info->table_io_manager->GetBlockManagerForRowData();
|
|
202527
202801
|
for (auto block_id : written_blocks) {
|
|
202528
202802
|
block_manager.MarkBlockAsModified(block_id);
|
|
202529
202803
|
}
|
|
@@ -202545,6 +202819,7 @@ void LocalStorage::MoveStorage(DataTable *old_dt, DataTable *new_dt) {
|
|
|
202545
202819
|
}
|
|
202546
202820
|
// take over the storage from the old entry
|
|
202547
202821
|
auto new_storage = move(entry->second);
|
|
202822
|
+
new_storage->table = new_dt;
|
|
202548
202823
|
table_storage.erase(entry);
|
|
202549
202824
|
table_storage[new_dt] = move(new_storage);
|
|
202550
202825
|
}
|
|
@@ -202596,7 +202871,7 @@ void LocalStorage::FetchChunk(DataTable *table, Vector &row_ids, idx_t count, Da
|
|
|
202596
202871
|
|
|
202597
202872
|
ColumnFetchState fetch_state;
|
|
202598
202873
|
vector<column_t> col_ids;
|
|
202599
|
-
vector<LogicalType> types = storage->table
|
|
202874
|
+
vector<LogicalType> types = storage->table->GetTypes();
|
|
202600
202875
|
for (idx_t i = 0; i < types.size(); i++) {
|
|
202601
202876
|
col_ids.push_back(i);
|
|
202602
202877
|
}
|
|
@@ -210911,6 +211186,22 @@ void TableIndexList::VerifyForeignKey(const vector<idx_t> &fk_keys, bool is_appe
|
|
|
210911
211186
|
}
|
|
210912
211187
|
}
|
|
210913
211188
|
|
|
211189
|
+
vector<column_t> TableIndexList::GetRequiredColumns() {
|
|
211190
|
+
lock_guard<mutex> lock(indexes_lock);
|
|
211191
|
+
set<column_t> unique_indexes;
|
|
211192
|
+
for (auto &index : indexes) {
|
|
211193
|
+
for (auto col_index : index->column_ids) {
|
|
211194
|
+
unique_indexes.insert(col_index);
|
|
211195
|
+
}
|
|
211196
|
+
}
|
|
211197
|
+
vector<column_t> result;
|
|
211198
|
+
result.reserve(unique_indexes.size());
|
|
211199
|
+
for (auto column_index : unique_indexes) {
|
|
211200
|
+
result.emplace_back(column_index);
|
|
211201
|
+
}
|
|
211202
|
+
return result;
|
|
211203
|
+
}
|
|
211204
|
+
|
|
210914
211205
|
vector<BlockPointer> TableIndexList::SerializeIndexes(duckdb::MetaBlockWriter &writer) {
|
|
210915
211206
|
vector<BlockPointer> blocks_info;
|
|
210916
211207
|
for (auto &index : indexes) {
|