duckdb 0.4.1-dev1225.0 → 0.4.1-dev1254.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +237 -103
- package/src/duckdb.hpp +16 -5
- package/src/parquet-amalgamation.cpp +34564 -34564
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -8230,10 +8230,10 @@ SequenceException::SequenceException(const string &msg) : Exception(ExceptionTyp
|
|
|
8230
8230
|
InterruptException::InterruptException() : Exception(ExceptionType::INTERRUPT, "Interrupted!") {
|
|
8231
8231
|
}
|
|
8232
8232
|
|
|
8233
|
-
FatalException::FatalException(const string &msg) : Exception(
|
|
8233
|
+
FatalException::FatalException(ExceptionType type, const string &msg) : Exception(type, msg) {
|
|
8234
8234
|
}
|
|
8235
8235
|
|
|
8236
|
-
InternalException::InternalException(const string &msg) :
|
|
8236
|
+
InternalException::InternalException(const string &msg) : FatalException(ExceptionType::INTERNAL, msg) {
|
|
8237
8237
|
}
|
|
8238
8238
|
|
|
8239
8239
|
InvalidInputException::InvalidInputException(const string &msg) : Exception(ExceptionType::INVALID_INPUT, msg) {
|
|
@@ -118897,6 +118897,11 @@ unique_ptr<DataChunk> ClientContext::FetchInternal(ClientContextLock &lock, Exec
|
|
|
118897
118897
|
// standard exceptions do not invalidate the current transaction
|
|
118898
118898
|
result.error = ex.what();
|
|
118899
118899
|
invalidate_query = false;
|
|
118900
|
+
} catch (FatalException &ex) {
|
|
118901
|
+
// fatal exceptions invalidate the entire database
|
|
118902
|
+
result.error = ex.what();
|
|
118903
|
+
auto &db = DatabaseInstance::GetDatabase(*this);
|
|
118904
|
+
db.Invalidate();
|
|
118900
118905
|
} catch (std::exception &ex) {
|
|
118901
118906
|
result.error = ex.what();
|
|
118902
118907
|
} catch (...) { // LCOV_EXCL_START
|
|
@@ -118910,6 +118915,10 @@ unique_ptr<DataChunk> ClientContext::FetchInternal(ClientContextLock &lock, Exec
|
|
|
118910
118915
|
void ClientContext::BeginTransactionInternal(ClientContextLock &lock, bool requires_valid_transaction) {
|
|
118911
118916
|
// check if we are on AutoCommit. In this case we should start a transaction
|
|
118912
118917
|
D_ASSERT(!active_query);
|
|
118918
|
+
auto &db = DatabaseInstance::GetDatabase(*this);
|
|
118919
|
+
if (db.IsInvalidated()) {
|
|
118920
|
+
throw FatalException("Failed: database has been invalidated!");
|
|
118921
|
+
}
|
|
118913
118922
|
if (requires_valid_transaction && transaction.HasActiveTransaction() &&
|
|
118914
118923
|
transaction.ActiveTransaction().IsInvalidated()) {
|
|
118915
118924
|
throw Exception("Failed: transaction has been invalidated!");
|
|
@@ -118958,6 +118967,10 @@ string ClientContext::EndQueryInternal(ClientContextLock &lock, bool success, bo
|
|
|
118958
118967
|
ActiveTransaction().Invalidate();
|
|
118959
118968
|
}
|
|
118960
118969
|
}
|
|
118970
|
+
} catch (FatalException &ex) {
|
|
118971
|
+
auto &db = DatabaseInstance::GetDatabase(*this);
|
|
118972
|
+
db.Invalidate();
|
|
118973
|
+
error = ex.what();
|
|
118961
118974
|
} catch (std::exception &ex) {
|
|
118962
118975
|
error = ex.what();
|
|
118963
118976
|
} catch (...) { // LCOV_EXCL_START
|
|
@@ -119401,7 +119414,17 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
|
|
|
119401
119414
|
shared_ptr<PreparedStatementData> &prepared, PendingQueryParameters parameters) {
|
|
119402
119415
|
unique_ptr<PendingQueryResult> result;
|
|
119403
119416
|
|
|
119404
|
-
|
|
119417
|
+
try {
|
|
119418
|
+
BeginQueryInternal(lock, query);
|
|
119419
|
+
} catch (FatalException &ex) {
|
|
119420
|
+
// fatal exceptions invalidate the entire database
|
|
119421
|
+
auto &db = DatabaseInstance::GetDatabase(*this);
|
|
119422
|
+
db.Invalidate();
|
|
119423
|
+
result = make_unique<PendingQueryResult>(ex.what());
|
|
119424
|
+
return result;
|
|
119425
|
+
} catch (std::exception &ex) {
|
|
119426
|
+
return make_unique<PendingQueryResult>(ex.what());
|
|
119427
|
+
}
|
|
119405
119428
|
// start the profiler
|
|
119406
119429
|
auto &profiler = QueryProfiler::Get(*this);
|
|
119407
119430
|
profiler.StartQuery(query, IsExplainAnalyze(statement ? statement.get() : prepared->unbound_statement.get()));
|
|
@@ -119425,6 +119448,11 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
|
|
|
119425
119448
|
// standard exceptions do not invalidate the current transaction
|
|
119426
119449
|
result = make_unique<PendingQueryResult>(ex.what());
|
|
119427
119450
|
invalidate_query = false;
|
|
119451
|
+
} catch (FatalException &ex) {
|
|
119452
|
+
// fatal exceptions invalidate the entire database
|
|
119453
|
+
auto &db = DatabaseInstance::GetDatabase(*this);
|
|
119454
|
+
db.Invalidate();
|
|
119455
|
+
result = make_unique<PendingQueryResult>(ex.what());
|
|
119428
119456
|
} catch (std::exception &ex) {
|
|
119429
119457
|
// other types of exceptions do invalidate the current transaction
|
|
119430
119458
|
result = make_unique<PendingQueryResult>(ex.what());
|
|
@@ -119902,6 +119930,10 @@ void ClientContext::RunFunctionInTransactionInternal(ClientContextLock &lock, co
|
|
|
119902
119930
|
transaction.Rollback();
|
|
119903
119931
|
}
|
|
119904
119932
|
throw;
|
|
119933
|
+
} catch (FatalException &ex) {
|
|
119934
|
+
auto &db = DatabaseInstance::GetDatabase(*this);
|
|
119935
|
+
db.Invalidate();
|
|
119936
|
+
throw;
|
|
119905
119937
|
} catch (std::exception &ex) {
|
|
119906
119938
|
if (require_new_transaction) {
|
|
119907
119939
|
transaction.Rollback();
|
|
@@ -121405,6 +121437,13 @@ string ClientConfig::ExtractTimezoneFromConfig(ClientConfig &config) {
|
|
|
121405
121437
|
}
|
|
121406
121438
|
}
|
|
121407
121439
|
|
|
121440
|
+
void DatabaseInstance::Invalidate() {
|
|
121441
|
+
this->is_invalidated = true;
|
|
121442
|
+
}
|
|
121443
|
+
bool DatabaseInstance::IsInvalidated() {
|
|
121444
|
+
return this->is_invalidated;
|
|
121445
|
+
}
|
|
121446
|
+
|
|
121408
121447
|
} // namespace duckdb
|
|
121409
121448
|
|
|
121410
121449
|
|
|
@@ -183064,6 +183103,13 @@ public:
|
|
|
183064
183103
|
return FindMinimumBitWidth<T, BYTE_ALIGNED>(values, count);
|
|
183065
183104
|
}
|
|
183066
183105
|
|
|
183106
|
+
// Calculates the minimum required number of bits per value that can store all values,
|
|
183107
|
+
// given a predetermined minimum and maximum value of the buffer
|
|
183108
|
+
template <class T>
|
|
183109
|
+
inline static bitpacking_width_t MinimumBitWidth(T minimum, T maximum) {
|
|
183110
|
+
return FindMinimumBitWidth<T, BYTE_ALIGNED>(minimum, maximum);
|
|
183111
|
+
}
|
|
183112
|
+
|
|
183067
183113
|
template <class T>
|
|
183068
183114
|
inline static idx_t GetRequiredSize(idx_t count, bitpacking_width_t width) {
|
|
183069
183115
|
count = RoundUpToAlgorithmGroupSize(count);
|
|
@@ -183152,6 +183198,18 @@ private:
|
|
|
183152
183198
|
}
|
|
183153
183199
|
}
|
|
183154
183200
|
|
|
183201
|
+
// Sign bit extension
|
|
183202
|
+
template <class T, class T_U = typename std::make_unsigned<T>::type>
|
|
183203
|
+
static void SignExtend(data_ptr_t dst, bitpacking_width_t width) {
|
|
183204
|
+
T const mask = ((T_U)1) << (width - 1);
|
|
183205
|
+
for (idx_t i = 0; i < BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE; ++i) {
|
|
183206
|
+
T value = Load<T>(dst + i * sizeof(T));
|
|
183207
|
+
value = value & ((((T_U)1) << width) - ((T_U)1));
|
|
183208
|
+
T result = (value ^ mask) - mask;
|
|
183209
|
+
Store(result, dst + i * sizeof(T));
|
|
183210
|
+
}
|
|
183211
|
+
}
|
|
183212
|
+
|
|
183155
183213
|
template <class T>
|
|
183156
183214
|
static void UnPackGroup(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width,
|
|
183157
183215
|
bool skip_sign_extension = false) {
|
|
@@ -183175,33 +183233,14 @@ private:
|
|
|
183175
183233
|
// Prevent compression at widths that are ineffective
|
|
183176
183234
|
template <class T>
|
|
183177
183235
|
static bitpacking_width_t GetEffectiveWidth(bitpacking_width_t width) {
|
|
183178
|
-
|
|
183179
|
-
|
|
183180
|
-
|
|
183181
|
-
|
|
183182
|
-
if (width > 28 && (std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value)) {
|
|
183183
|
-
return 32;
|
|
183184
|
-
}
|
|
183185
|
-
|
|
183186
|
-
else if (width > 14 && (std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value)) {
|
|
183187
|
-
return 16;
|
|
183236
|
+
auto bits_of_type = sizeof(T) * 8;
|
|
183237
|
+
auto type_size = sizeof(T);
|
|
183238
|
+
if (width + type_size > bits_of_type) {
|
|
183239
|
+
return bits_of_type;
|
|
183188
183240
|
}
|
|
183189
|
-
|
|
183190
183241
|
return width;
|
|
183191
183242
|
}
|
|
183192
183243
|
|
|
183193
|
-
// Sign bit extension
|
|
183194
|
-
template <class T, class T_U = typename std::make_unsigned<T>::type>
|
|
183195
|
-
static void SignExtend(data_ptr_t dst, bitpacking_width_t width) {
|
|
183196
|
-
T const mask = ((T_U)1) << (width - 1);
|
|
183197
|
-
for (idx_t i = 0; i < BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE; ++i) {
|
|
183198
|
-
T value = Load<T>(dst + i * sizeof(T));
|
|
183199
|
-
value = value & ((((T_U)1) << width) - ((T_U)1));
|
|
183200
|
-
T result = (value ^ mask) - mask;
|
|
183201
|
-
Store(result, dst + i * sizeof(T));
|
|
183202
|
-
}
|
|
183203
|
-
}
|
|
183204
|
-
|
|
183205
183244
|
template <class T>
|
|
183206
183245
|
static void PackGroup(data_ptr_t dst, T *values, bitpacking_width_t width) {
|
|
183207
183246
|
if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) {
|
|
@@ -183465,17 +183504,19 @@ private:
|
|
|
183465
183504
|
|
|
183466
183505
|
|
|
183467
183506
|
|
|
183507
|
+
|
|
183468
183508
|
#include <functional>
|
|
183469
183509
|
|
|
183470
183510
|
namespace duckdb {
|
|
183471
183511
|
|
|
183472
183512
|
// Note that optimizations in scanning only work if this value is equal to STANDARD_VECTOR_SIZE, however we keep them
|
|
183473
183513
|
// separated to prevent the code from break on lower vector sizes
|
|
183474
|
-
static constexpr const idx_t
|
|
183514
|
+
static constexpr const idx_t BITPACKING_METADATA_GROUP_SIZE = 1024;
|
|
183475
183515
|
|
|
183476
183516
|
struct EmptyBitpackingWriter {
|
|
183477
183517
|
template <class T>
|
|
183478
|
-
static void Operation(T *values, bool *validity, bitpacking_width_t width, idx_t count,
|
|
183518
|
+
static void Operation(T *values, bool *validity, bitpacking_width_t width, T frame_of_reference, idx_t count,
|
|
183519
|
+
void *data_ptr) {
|
|
183479
183520
|
}
|
|
183480
183521
|
};
|
|
183481
183522
|
|
|
@@ -183483,39 +183524,95 @@ template <class T>
|
|
|
183483
183524
|
struct BitpackingState {
|
|
183484
183525
|
public:
|
|
183485
183526
|
BitpackingState() : compression_buffer_idx(0), total_size(0), data_ptr(nullptr) {
|
|
183527
|
+
ResetMinMax();
|
|
183486
183528
|
}
|
|
183487
183529
|
|
|
183488
|
-
T compression_buffer[
|
|
183489
|
-
bool compression_buffer_validity[
|
|
183530
|
+
T compression_buffer[BITPACKING_METADATA_GROUP_SIZE];
|
|
183531
|
+
bool compression_buffer_validity[BITPACKING_METADATA_GROUP_SIZE];
|
|
183490
183532
|
idx_t compression_buffer_idx;
|
|
183491
183533
|
idx_t total_size;
|
|
183492
183534
|
void *data_ptr;
|
|
183493
183535
|
|
|
183536
|
+
bool min_max_set;
|
|
183537
|
+
T minimum;
|
|
183538
|
+
T maximum;
|
|
183539
|
+
|
|
183494
183540
|
public:
|
|
183495
|
-
|
|
183541
|
+
void SubtractFrameOfReference(const T &frame_of_reference) {
|
|
183542
|
+
for (idx_t i = 0; i < compression_buffer_idx; i++) {
|
|
183543
|
+
compression_buffer[i] -= frame_of_reference;
|
|
183544
|
+
}
|
|
183545
|
+
}
|
|
183546
|
+
|
|
183547
|
+
void ResetMinMax() {
|
|
183548
|
+
min_max_set = false;
|
|
183549
|
+
//! We set these to 0, in case all values are NULL, in which case the min and max will never be set.
|
|
183550
|
+
minimum = 0;
|
|
183551
|
+
maximum = 0;
|
|
183552
|
+
}
|
|
183553
|
+
|
|
183554
|
+
bool TryUpdateMinMax(T value) {
|
|
183555
|
+
bool updated = false;
|
|
183556
|
+
if (!min_max_set || value < minimum) {
|
|
183557
|
+
minimum = value;
|
|
183558
|
+
updated = true;
|
|
183559
|
+
}
|
|
183560
|
+
if (!min_max_set || value > maximum) {
|
|
183561
|
+
maximum = value;
|
|
183562
|
+
updated = true;
|
|
183563
|
+
}
|
|
183564
|
+
min_max_set = min_max_set || updated;
|
|
183565
|
+
//! Only when either of the values are updated, do we need to test the overflow
|
|
183566
|
+
if (updated) {
|
|
183567
|
+
T ignore;
|
|
183568
|
+
return TrySubtractOperator::Operation(maximum, minimum, ignore);
|
|
183569
|
+
}
|
|
183570
|
+
return true;
|
|
183571
|
+
}
|
|
183572
|
+
|
|
183573
|
+
T GetFrameOfReference() {
|
|
183574
|
+
return minimum;
|
|
183575
|
+
}
|
|
183576
|
+
T Maximum() {
|
|
183577
|
+
return maximum;
|
|
183578
|
+
}
|
|
183579
|
+
|
|
183580
|
+
template <class OP, class T_U = typename std::make_unsigned<T>::type>
|
|
183496
183581
|
void Flush() {
|
|
183497
|
-
|
|
183498
|
-
|
|
183499
|
-
|
|
183582
|
+
T frame_of_reference = GetFrameOfReference();
|
|
183583
|
+
SubtractFrameOfReference(frame_of_reference);
|
|
183584
|
+
|
|
183585
|
+
//! Because of FOR, we can guarantee that all values are positive
|
|
183586
|
+
T_U adjusted_maximum = T_U(Maximum() - frame_of_reference);
|
|
183587
|
+
|
|
183588
|
+
bitpacking_width_t width = BitpackingPrimitives::MinimumBitWidth<T_U>((T_U)0, adjusted_maximum);
|
|
183589
|
+
OP::template Operation<T>(compression_buffer, compression_buffer_validity, width, frame_of_reference,
|
|
183590
|
+
compression_buffer_idx, data_ptr);
|
|
183591
|
+
total_size += (BITPACKING_METADATA_GROUP_SIZE * width) / 8 + sizeof(bitpacking_width_t) + sizeof(T);
|
|
183500
183592
|
compression_buffer_idx = 0;
|
|
183593
|
+
ResetMinMax();
|
|
183501
183594
|
}
|
|
183502
183595
|
|
|
183503
183596
|
template <class OP = EmptyBitpackingWriter>
|
|
183504
|
-
|
|
183597
|
+
bool Update(T *data, ValidityMask &validity, idx_t idx) {
|
|
183505
183598
|
|
|
183506
183599
|
if (validity.RowIsValid(idx)) {
|
|
183507
183600
|
compression_buffer_validity[compression_buffer_idx] = true;
|
|
183508
183601
|
compression_buffer[compression_buffer_idx++] = data[idx];
|
|
183602
|
+
if (!TryUpdateMinMax(data[idx])) {
|
|
183603
|
+
return false;
|
|
183604
|
+
}
|
|
183509
183605
|
} else {
|
|
183510
183606
|
// We write zero for easy bitwidth analysis of the compression buffer later
|
|
183511
183607
|
compression_buffer_validity[compression_buffer_idx] = false;
|
|
183512
183608
|
compression_buffer[compression_buffer_idx++] = 0;
|
|
183513
183609
|
}
|
|
183514
183610
|
|
|
183515
|
-
if (compression_buffer_idx ==
|
|
183611
|
+
if (compression_buffer_idx == BITPACKING_METADATA_GROUP_SIZE) {
|
|
183516
183612
|
// Calculate bitpacking width;
|
|
183517
183613
|
Flush<OP>();
|
|
183518
183614
|
}
|
|
183615
|
+
return true;
|
|
183519
183616
|
}
|
|
183520
183617
|
};
|
|
183521
183618
|
|
|
@@ -183541,9 +183638,10 @@ bool BitpackingAnalyze(AnalyzeState &state, Vector &input, idx_t count) {
|
|
|
183541
183638
|
auto data = (T *)vdata.data;
|
|
183542
183639
|
for (idx_t i = 0; i < count; i++) {
|
|
183543
183640
|
auto idx = vdata.sel->get_index(i);
|
|
183544
|
-
analyze_state.state.template Update<EmptyBitpackingWriter>(data, vdata.validity, idx)
|
|
183641
|
+
if (!analyze_state.state.template Update<EmptyBitpackingWriter>(data, vdata.validity, idx)) {
|
|
183642
|
+
return false;
|
|
183643
|
+
}
|
|
183545
183644
|
}
|
|
183546
|
-
|
|
183547
183645
|
return true;
|
|
183548
183646
|
}
|
|
183549
183647
|
|
|
@@ -183577,19 +183675,25 @@ public:
|
|
|
183577
183675
|
|
|
183578
183676
|
// Ptr to next free spot in segment;
|
|
183579
183677
|
data_ptr_t data_ptr;
|
|
183580
|
-
// Ptr to next free spot for storing bitwidths (growing downwards).
|
|
183581
|
-
data_ptr_t
|
|
183678
|
+
// Ptr to next free spot for storing bitwidths and frame-of-references (growing downwards).
|
|
183679
|
+
data_ptr_t metadata_ptr;
|
|
183582
183680
|
|
|
183583
183681
|
BitpackingState<T> state;
|
|
183584
183682
|
|
|
183585
183683
|
public:
|
|
183586
183684
|
struct BitpackingWriter {
|
|
183685
|
+
|
|
183587
183686
|
template <class VALUE_TYPE>
|
|
183588
|
-
static void Operation(VALUE_TYPE *values, bool *validity, bitpacking_width_t width,
|
|
183589
|
-
void *data_ptr) {
|
|
183687
|
+
static void Operation(VALUE_TYPE *values, bool *validity, bitpacking_width_t width,
|
|
183688
|
+
VALUE_TYPE frame_of_reference, idx_t count, void *data_ptr) {
|
|
183590
183689
|
auto state = (BitpackingCompressState<T> *)data_ptr;
|
|
183690
|
+
auto total_bits_needed = (width * BITPACKING_METADATA_GROUP_SIZE);
|
|
183691
|
+
D_ASSERT(total_bits_needed % 8 == 0);
|
|
183692
|
+
auto total_bytes_needed = total_bits_needed / 8;
|
|
183693
|
+
total_bytes_needed += sizeof(bitpacking_width_t);
|
|
183694
|
+
total_bytes_needed += sizeof(VALUE_TYPE);
|
|
183591
183695
|
|
|
183592
|
-
if (state->RemainingSize() <
|
|
183696
|
+
if (state->RemainingSize() < total_bytes_needed) {
|
|
183593
183697
|
// Segment is full
|
|
183594
183698
|
auto row_start = state->current_segment->start + state->current_segment->count;
|
|
183595
183699
|
state->FlushSegment();
|
|
@@ -183598,17 +183702,17 @@ public:
|
|
|
183598
183702
|
|
|
183599
183703
|
for (idx_t i = 0; i < count; i++) {
|
|
183600
183704
|
if (validity[i]) {
|
|
183601
|
-
NumericStatistics::Update<T>(state->current_segment->stats, values[i]);
|
|
183705
|
+
NumericStatistics::Update<T>(state->current_segment->stats, values[i] + frame_of_reference);
|
|
183602
183706
|
}
|
|
183603
183707
|
}
|
|
183604
183708
|
|
|
183605
|
-
state->WriteValues(values, width, count);
|
|
183709
|
+
state->WriteValues(values, width, frame_of_reference, count);
|
|
183606
183710
|
}
|
|
183607
183711
|
};
|
|
183608
183712
|
|
|
183609
|
-
// Space remaining between the
|
|
183713
|
+
// Space remaining between the metadata_ptr growing down and data ptr growing up
|
|
183610
183714
|
idx_t RemainingSize() {
|
|
183611
|
-
return
|
|
183715
|
+
return metadata_ptr - data_ptr;
|
|
183612
183716
|
}
|
|
183613
183717
|
|
|
183614
183718
|
void CreateEmptySegment(idx_t row_start) {
|
|
@@ -183621,7 +183725,8 @@ public:
|
|
|
183621
183725
|
handle = buffer_manager.Pin(current_segment->block);
|
|
183622
183726
|
|
|
183623
183727
|
data_ptr = handle.Ptr() + current_segment->GetBlockOffset() + BitpackingPrimitives::BITPACKING_HEADER_SIZE;
|
|
183624
|
-
|
|
183728
|
+
metadata_ptr =
|
|
183729
|
+
handle.Ptr() + current_segment->GetBlockOffset() + Storage::BLOCK_SIZE - sizeof(bitpacking_width_t);
|
|
183625
183730
|
}
|
|
183626
183731
|
|
|
183627
183732
|
void Append(UnifiedVectorFormat &vdata, idx_t count) {
|
|
@@ -183634,13 +183739,15 @@ public:
|
|
|
183634
183739
|
}
|
|
183635
183740
|
}
|
|
183636
183741
|
|
|
183637
|
-
void WriteValues(T *values, bitpacking_width_t width, idx_t count) {
|
|
183638
|
-
// TODO we can optimize this by stopping early if count <
|
|
183742
|
+
void WriteValues(T *values, bitpacking_width_t width, T frame_of_reference, idx_t count) {
|
|
183743
|
+
// TODO we can optimize this by stopping early if count < BITPACKING_METADATA_GROUP_SIZE
|
|
183639
183744
|
BitpackingPrimitives::PackBuffer<T, false>(data_ptr, values, count, width);
|
|
183640
|
-
data_ptr += (
|
|
183745
|
+
data_ptr += (BITPACKING_METADATA_GROUP_SIZE * width) / 8;
|
|
183641
183746
|
|
|
183642
|
-
Store<bitpacking_width_t>(width,
|
|
183643
|
-
|
|
183747
|
+
Store<bitpacking_width_t>(width, metadata_ptr);
|
|
183748
|
+
metadata_ptr -= sizeof(T);
|
|
183749
|
+
Store<T>(frame_of_reference, metadata_ptr);
|
|
183750
|
+
metadata_ptr -= sizeof(bitpacking_width_t);
|
|
183644
183751
|
|
|
183645
183752
|
current_segment->count += count;
|
|
183646
183753
|
}
|
|
@@ -183649,14 +183756,14 @@ public:
|
|
|
183649
183756
|
auto &state = checkpointer.GetCheckpointState();
|
|
183650
183757
|
auto dataptr = handle.Ptr();
|
|
183651
183758
|
|
|
183652
|
-
// Compact the segment by moving the
|
|
183653
|
-
idx_t
|
|
183654
|
-
idx_t
|
|
183655
|
-
idx_t total_segment_size =
|
|
183656
|
-
memmove(dataptr +
|
|
183759
|
+
// Compact the segment by moving the metadata next to the data.
|
|
183760
|
+
idx_t metadata_offset = AlignValue(data_ptr - dataptr);
|
|
183761
|
+
idx_t metadata_size = dataptr + Storage::BLOCK_SIZE - metadata_ptr - 1;
|
|
183762
|
+
idx_t total_segment_size = metadata_offset + metadata_size;
|
|
183763
|
+
memmove(dataptr + metadata_offset, metadata_ptr + 1, metadata_size);
|
|
183657
183764
|
|
|
183658
|
-
// Store the offset of the first
|
|
183659
|
-
Store<idx_t>(
|
|
183765
|
+
// Store the offset of the metadata of the first group (which is at the highest address).
|
|
183766
|
+
Store<idx_t>(metadata_offset + metadata_size - 1, dataptr);
|
|
183660
183767
|
handle.Destroy();
|
|
183661
183768
|
|
|
183662
183769
|
state.FlushSegment(move(current_segment), total_segment_size);
|
|
@@ -183699,14 +183806,14 @@ public:
|
|
|
183699
183806
|
auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
|
|
183700
183807
|
handle = buffer_manager.Pin(segment.block);
|
|
183701
183808
|
auto dataptr = handle.Ptr();
|
|
183702
|
-
|
|
183809
|
+
current_metadata_group_ptr = dataptr + segment.GetBlockOffset() + BitpackingPrimitives::BITPACKING_HEADER_SIZE;
|
|
183703
183810
|
|
|
183704
183811
|
// load offset to bitpacking widths pointer
|
|
183705
|
-
auto
|
|
183706
|
-
|
|
183812
|
+
auto bitpacking_metadata_offset = Load<idx_t>(dataptr + segment.GetBlockOffset());
|
|
183813
|
+
bitpacking_metadata_ptr = dataptr + segment.GetBlockOffset() + bitpacking_metadata_offset;
|
|
183707
183814
|
|
|
183708
|
-
// load the
|
|
183709
|
-
|
|
183815
|
+
// load the metadata of the first vector
|
|
183816
|
+
LoadCurrentMetaData();
|
|
183710
183817
|
}
|
|
183711
183818
|
|
|
183712
183819
|
BufferHandle handle;
|
|
@@ -183715,32 +183822,37 @@ public:
|
|
|
183715
183822
|
T decompression_buffer[BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE];
|
|
183716
183823
|
|
|
183717
183824
|
idx_t position_in_group = 0;
|
|
183718
|
-
data_ptr_t
|
|
183719
|
-
data_ptr_t
|
|
183825
|
+
data_ptr_t current_metadata_group_ptr;
|
|
183826
|
+
data_ptr_t bitpacking_metadata_ptr;
|
|
183720
183827
|
bitpacking_width_t current_width;
|
|
183828
|
+
T current_frame_of_reference;
|
|
183721
183829
|
|
|
183722
183830
|
public:
|
|
183723
|
-
|
|
183724
|
-
|
|
183725
|
-
|
|
183831
|
+
//! Loads the current group header, and sets pointer to next header
|
|
183832
|
+
void LoadCurrentMetaData() {
|
|
183833
|
+
D_ASSERT(bitpacking_metadata_ptr > handle.Ptr() &&
|
|
183834
|
+
bitpacking_metadata_ptr < handle.Ptr() + Storage::BLOCK_SIZE);
|
|
183835
|
+
current_width = Load<bitpacking_width_t>(bitpacking_metadata_ptr);
|
|
183836
|
+
bitpacking_metadata_ptr -= sizeof(T);
|
|
183837
|
+
current_frame_of_reference = Load<T>(bitpacking_metadata_ptr);
|
|
183838
|
+
bitpacking_metadata_ptr -= sizeof(bitpacking_width_t);
|
|
183726
183839
|
LoadDecompressFunction();
|
|
183727
183840
|
}
|
|
183728
183841
|
|
|
183729
183842
|
void Skip(ColumnSegment &segment, idx_t skip_count) {
|
|
183730
183843
|
while (skip_count > 0) {
|
|
183731
|
-
if (position_in_group + skip_count <
|
|
183844
|
+
if (position_in_group + skip_count < BITPACKING_METADATA_GROUP_SIZE) {
|
|
183732
183845
|
// We're not leaving this bitpacking group, we can perform all skips.
|
|
183733
183846
|
position_in_group += skip_count;
|
|
183734
183847
|
break;
|
|
183735
183848
|
} else {
|
|
183736
183849
|
// The skip crosses the current bitpacking group, we skip the remainder of this group.
|
|
183737
|
-
auto skipping =
|
|
183850
|
+
auto skipping = BITPACKING_METADATA_GROUP_SIZE - position_in_group;
|
|
183738
183851
|
position_in_group = 0;
|
|
183739
|
-
|
|
183852
|
+
current_metadata_group_ptr += (current_width * BITPACKING_METADATA_GROUP_SIZE) / 8;
|
|
183740
183853
|
|
|
183741
|
-
//
|
|
183742
|
-
|
|
183743
|
-
LoadCurrentBitWidth();
|
|
183854
|
+
// Load new width
|
|
183855
|
+
LoadCurrentMetaData();
|
|
183744
183856
|
|
|
183745
183857
|
skip_count -= skipping;
|
|
183746
183858
|
}
|
|
@@ -183758,6 +183870,16 @@ unique_ptr<SegmentScanState> BitpackingInitScan(ColumnSegment &segment) {
|
|
|
183758
183870
|
return move(result);
|
|
183759
183871
|
}
|
|
183760
183872
|
|
|
183873
|
+
template <class T>
|
|
183874
|
+
static void ApplyFrameOfReference(T *dst, T frame_of_reference, idx_t size) {
|
|
183875
|
+
if (!frame_of_reference) {
|
|
183876
|
+
return;
|
|
183877
|
+
}
|
|
183878
|
+
for (idx_t i = 0; i < size; i++) {
|
|
183879
|
+
dst[i] += frame_of_reference;
|
|
183880
|
+
}
|
|
183881
|
+
}
|
|
183882
|
+
|
|
183761
183883
|
//===--------------------------------------------------------------------===//
|
|
183762
183884
|
// Scan base data
|
|
183763
183885
|
//===--------------------------------------------------------------------===//
|
|
@@ -183770,31 +183892,28 @@ void BitpackingScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t
|
|
|
183770
183892
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
183771
183893
|
|
|
183772
183894
|
// Fast path for when no compression was used, we can do a single memcopy
|
|
183773
|
-
if (STANDARD_VECTOR_SIZE ==
|
|
183774
|
-
if (scan_state.current_width == sizeof(T) * 8 &&
|
|
183775
|
-
scan_state.position_in_group == 0) {
|
|
183776
|
-
|
|
183777
|
-
memcpy(result_data + result_offset, scan_state.
|
|
183778
|
-
scan_state.
|
|
183779
|
-
scan_state.
|
|
183780
|
-
scan_state.LoadCurrentBitWidth();
|
|
183895
|
+
if (STANDARD_VECTOR_SIZE == BITPACKING_METADATA_GROUP_SIZE) {
|
|
183896
|
+
if (scan_state.current_frame_of_reference == 0 && scan_state.current_width == sizeof(T) * 8 &&
|
|
183897
|
+
scan_count <= BITPACKING_METADATA_GROUP_SIZE && scan_state.position_in_group == 0) {
|
|
183898
|
+
|
|
183899
|
+
memcpy(result_data + result_offset, scan_state.current_metadata_group_ptr, scan_count * sizeof(T));
|
|
183900
|
+
scan_state.current_metadata_group_ptr += scan_count * sizeof(T);
|
|
183901
|
+
scan_state.LoadCurrentMetaData();
|
|
183781
183902
|
return;
|
|
183782
183903
|
}
|
|
183783
183904
|
}
|
|
183784
183905
|
|
|
183785
|
-
|
|
183786
|
-
|
|
183787
|
-
bool skip_sign_extend = std::is_signed<T>::value && nstats.min >= 0;
|
|
183906
|
+
//! Because FOR offsets all our values to be 0 or above, we can always skip sign extension here
|
|
183907
|
+
bool skip_sign_extend = true;
|
|
183788
183908
|
|
|
183789
183909
|
idx_t scanned = 0;
|
|
183790
183910
|
|
|
183791
183911
|
while (scanned < scan_count) {
|
|
183792
|
-
// Exhausted this
|
|
183793
|
-
if (scan_state.position_in_group >=
|
|
183912
|
+
// Exhausted this metadata group, move pointers to next group and load metadata for next group.
|
|
183913
|
+
if (scan_state.position_in_group >= BITPACKING_METADATA_GROUP_SIZE) {
|
|
183794
183914
|
scan_state.position_in_group = 0;
|
|
183795
|
-
scan_state.
|
|
183796
|
-
scan_state.
|
|
183797
|
-
scan_state.LoadCurrentBitWidth();
|
|
183915
|
+
scan_state.current_metadata_group_ptr += (scan_state.current_width * BITPACKING_METADATA_GROUP_SIZE) / 8;
|
|
183916
|
+
scan_state.LoadCurrentMetaData();
|
|
183798
183917
|
}
|
|
183799
183918
|
|
|
183800
183919
|
idx_t offset_in_compression_group =
|
|
@@ -183805,7 +183924,7 @@ void BitpackingScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t
|
|
|
183805
183924
|
|
|
183806
183925
|
// Calculate start of compression algorithm group
|
|
183807
183926
|
data_ptr_t current_position_ptr =
|
|
183808
|
-
scan_state.
|
|
183927
|
+
scan_state.current_metadata_group_ptr + scan_state.position_in_group * scan_state.current_width / 8;
|
|
183809
183928
|
data_ptr_t decompression_group_start_pointer =
|
|
183810
183929
|
current_position_ptr - offset_in_compression_group * scan_state.current_width / 8;
|
|
183811
183930
|
|
|
@@ -183824,7 +183943,7 @@ void BitpackingScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t
|
|
|
183824
183943
|
memcpy(current_result_ptr, scan_state.decompression_buffer + offset_in_compression_group,
|
|
183825
183944
|
to_scan * sizeof(T));
|
|
183826
183945
|
}
|
|
183827
|
-
|
|
183946
|
+
ApplyFrameOfReference((T *)current_result_ptr, scan_state.current_frame_of_reference, to_scan);
|
|
183828
183947
|
scanned += to_scan;
|
|
183829
183948
|
scan_state.position_in_group += to_scan;
|
|
183830
183949
|
}
|
|
@@ -183851,16 +183970,18 @@ void BitpackingFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t r
|
|
|
183851
183970
|
scan_state.position_in_group % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE;
|
|
183852
183971
|
|
|
183853
183972
|
data_ptr_t decompression_group_start_pointer =
|
|
183854
|
-
scan_state.
|
|
183973
|
+
scan_state.current_metadata_group_ptr +
|
|
183855
183974
|
(scan_state.position_in_group - offset_in_compression_group) * scan_state.current_width / 8;
|
|
183856
183975
|
|
|
183857
|
-
|
|
183858
|
-
bool skip_sign_extend =
|
|
183976
|
+
//! Because FOR offsets all our values to be 0 or above, we can always skip sign extension here
|
|
183977
|
+
bool skip_sign_extend = true;
|
|
183859
183978
|
|
|
183860
183979
|
scan_state.decompress_function((data_ptr_t)scan_state.decompression_buffer, decompression_group_start_pointer,
|
|
183861
183980
|
scan_state.current_width, skip_sign_extend);
|
|
183862
183981
|
|
|
183863
183982
|
*current_result_ptr = *(T *)(scan_state.decompression_buffer + offset_in_compression_group);
|
|
183983
|
+
//! Apply FOR to result
|
|
183984
|
+
*current_result_ptr += scan_state.current_frame_of_reference;
|
|
183864
183985
|
}
|
|
183865
183986
|
template <class T>
|
|
183866
183987
|
void BitpackingSkip(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count) {
|
|
@@ -190334,7 +190455,7 @@ string ValidityStatistics::ToString() const {
|
|
|
190334
190455
|
|
|
190335
190456
|
namespace duckdb {
|
|
190336
190457
|
|
|
190337
|
-
const uint64_t VERSION_NUMBER =
|
|
190458
|
+
const uint64_t VERSION_NUMBER = 38;
|
|
190338
190459
|
|
|
190339
190460
|
} // namespace duckdb
|
|
190340
190461
|
|
|
@@ -191789,7 +191910,8 @@ void ColumnDataCheckpointer::ScanSegments(const std::function<void(Vector &, idx
|
|
|
191789
191910
|
}
|
|
191790
191911
|
}
|
|
191791
191912
|
|
|
191792
|
-
|
|
191913
|
+
CompressionType ForceCompression(vector<CompressionFunction *> &compression_functions,
|
|
191914
|
+
CompressionType compression_type) {
|
|
191793
191915
|
// On of the force_compression flags has been set
|
|
191794
191916
|
// check if this compression method is available
|
|
191795
191917
|
bool found = false;
|
|
@@ -191802,25 +191924,31 @@ void ForceCompression(vector<CompressionFunction *> &compression_functions, Comp
|
|
|
191802
191924
|
if (found) {
|
|
191803
191925
|
// the force_compression method is available
|
|
191804
191926
|
// clear all other compression methods
|
|
191927
|
+
// except the uncompressed method, so we can fall back on that
|
|
191805
191928
|
for (idx_t i = 0; i < compression_functions.size(); i++) {
|
|
191929
|
+
if (compression_functions[i]->type == CompressionType::COMPRESSION_UNCOMPRESSED) {
|
|
191930
|
+
continue;
|
|
191931
|
+
}
|
|
191806
191932
|
if (compression_functions[i]->type != compression_type) {
|
|
191807
191933
|
compression_functions[i] = nullptr;
|
|
191808
191934
|
}
|
|
191809
191935
|
}
|
|
191810
191936
|
}
|
|
191937
|
+
return found ? compression_type : CompressionType::COMPRESSION_AUTO;
|
|
191811
191938
|
}
|
|
191812
191939
|
|
|
191813
191940
|
unique_ptr<AnalyzeState> ColumnDataCheckpointer::DetectBestCompressionMethod(idx_t &compression_idx) {
|
|
191814
191941
|
D_ASSERT(!compression_functions.empty());
|
|
191815
191942
|
auto &config = DBConfig::GetConfig(GetDatabase());
|
|
191943
|
+
CompressionType forced_method = CompressionType::COMPRESSION_AUTO;
|
|
191816
191944
|
|
|
191817
191945
|
auto compression_type = checkpoint_info.compression_type;
|
|
191818
191946
|
if (compression_type != CompressionType::COMPRESSION_AUTO) {
|
|
191819
|
-
ForceCompression(compression_functions, compression_type);
|
|
191947
|
+
forced_method = ForceCompression(compression_functions, compression_type);
|
|
191820
191948
|
}
|
|
191821
191949
|
if (compression_type == CompressionType::COMPRESSION_AUTO &&
|
|
191822
191950
|
config.options.force_compression != CompressionType::COMPRESSION_AUTO) {
|
|
191823
|
-
ForceCompression(compression_functions, config.options.force_compression);
|
|
191951
|
+
forced_method = ForceCompression(compression_functions, config.options.force_compression);
|
|
191824
191952
|
}
|
|
191825
191953
|
// set up the analyze states for each compression method
|
|
191826
191954
|
vector<unique_ptr<AnalyzeState>> analyze_states;
|
|
@@ -191858,12 +191986,18 @@ unique_ptr<AnalyzeState> ColumnDataCheckpointer::DetectBestCompressionMethod(idx
|
|
|
191858
191986
|
if (!compression_functions[i]) {
|
|
191859
191987
|
continue;
|
|
191860
191988
|
}
|
|
191989
|
+
//! Check if the method type is the forced method (if forced is used)
|
|
191990
|
+
bool forced_method_found = compression_functions[i]->type == forced_method;
|
|
191861
191991
|
auto score = compression_functions[i]->final_analyze(*analyze_states[i]);
|
|
191862
|
-
if (score < best_score) {
|
|
191992
|
+
if (score < best_score || forced_method_found) {
|
|
191863
191993
|
compression_idx = i;
|
|
191864
191994
|
best_score = score;
|
|
191865
191995
|
state = move(analyze_states[i]);
|
|
191866
191996
|
}
|
|
191997
|
+
//! If we have found the forced method, we're done
|
|
191998
|
+
if (forced_method_found) {
|
|
191999
|
+
break;
|
|
192000
|
+
}
|
|
191867
192001
|
}
|
|
191868
192002
|
return state;
|
|
191869
192003
|
}
|
|
@@ -191892,7 +192026,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
|
|
|
191892
192026
|
auto analyze_state = DetectBestCompressionMethod(compression_idx);
|
|
191893
192027
|
|
|
191894
192028
|
if (!analyze_state) {
|
|
191895
|
-
throw
|
|
192029
|
+
throw FatalException("No suitable compression/storage method found to store column");
|
|
191896
192030
|
}
|
|
191897
192031
|
|
|
191898
192032
|
// now that we have analyzed the compression functions we can start writing to disk
|