duckdb 0.8.2-dev1862.0 → 0.8.2-dev1968.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +9 -9
- package/package.json +1 -1
- package/src/duckdb/src/common/enum_util.cpp +28 -0
- package/src/duckdb/src/common/types/hugeint.cpp +40 -0
- package/src/duckdb/src/core_functions/function_list.cpp +1 -0
- package/src/duckdb/src/core_functions/scalar/string/to_base.cpp +66 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +14 -11
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +6 -4
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +14 -12
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +6 -4
- package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +8 -6
- package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +2 -2
- package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -3
- package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +7 -5
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +7 -5
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +5 -4
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +13 -6
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +7 -5
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -5
- package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +7 -4
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +8 -6
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -5
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +7 -5
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +8 -6
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +8 -7
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +8 -6
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +11 -9
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +10 -10
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -2
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +7 -6
- package/src/duckdb/src/execution/physical_operator.cpp +3 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +70 -55
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +5 -1
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/limits.hpp +52 -149
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +48 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +9 -0
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +11 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -3
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +3 -2
- package/src/duckdb/src/main/relation/join_relation.cpp +1 -1
- package/src/duckdb/src/parallel/pipeline.cpp +0 -17
- package/src/duckdb/src/parallel/pipeline_executor.cpp +26 -7
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +55 -1
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +13 -3
- package/src/duckdb/src/storage/compression/bitpacking.cpp +87 -63
- package/src/duckdb/src/storage/compression/bitpacking_hugeint.cpp +295 -0
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
- package/src/duckdb/ub_src_storage_compression.cpp +2 -0
@@ -9,8 +9,11 @@
|
|
9
9
|
#include "duckdb/storage/table/column_data_checkpointer.hpp"
|
10
10
|
#include "duckdb/storage/table/column_segment.hpp"
|
11
11
|
#include "duckdb/common/operator/subtract.hpp"
|
12
|
+
#include "duckdb/common/operator/multiply.hpp"
|
13
|
+
#include "duckdb/common/operator/add.hpp"
|
12
14
|
#include "duckdb/storage/compression/bitpacking.hpp"
|
13
15
|
#include "duckdb/storage/table/scan_state.hpp"
|
16
|
+
#include "duckdb/common/numeric_utils.hpp"
|
14
17
|
|
15
18
|
#include <functional>
|
16
19
|
|
@@ -77,11 +80,11 @@ struct EmptyBitpackingWriter {
|
|
77
80
|
template <class T>
|
78
81
|
static void WriteConstant(T constant, idx_t count, void *data_ptr, bool all_invalid) {
|
79
82
|
}
|
80
|
-
template <class T, class T_S = typename
|
83
|
+
template <class T, class T_S = typename MakeSigned<T>::type>
|
81
84
|
static void WriteConstantDelta(T_S constant, T frame_of_reference, idx_t count, T *values, bool *validity,
|
82
85
|
void *data_ptr) {
|
83
86
|
}
|
84
|
-
template <class T, class T_S = typename
|
87
|
+
template <class T, class T_S = typename MakeSigned<T>::type>
|
85
88
|
static void WriteDeltaFor(T *values, bool *validity, bitpacking_width_t width, T frame_of_reference,
|
86
89
|
T_S delta_offset, T *original_values, idx_t count, void *data_ptr) {
|
87
90
|
}
|
@@ -91,11 +94,11 @@ struct EmptyBitpackingWriter {
|
|
91
94
|
}
|
92
95
|
};
|
93
96
|
|
94
|
-
template <class T, class
|
97
|
+
template <class T, class T_S = typename MakeSigned<T>::type>
|
95
98
|
struct BitpackingState {
|
96
99
|
public:
|
97
100
|
BitpackingState() : compression_buffer_idx(0), total_size(0), data_ptr(nullptr) {
|
98
|
-
compression_buffer_internal[0] = (
|
101
|
+
compression_buffer_internal[0] = T(0);
|
99
102
|
compression_buffer = &compression_buffer_internal[1];
|
100
103
|
Reset();
|
101
104
|
}
|
@@ -151,7 +154,7 @@ public:
|
|
151
154
|
void CalculateDeltaStats() {
|
152
155
|
// TODO: currently we dont support delta compression of values above NumericLimits<T_S>::Maximum(),
|
153
156
|
// we could support this with some clever substract trickery?
|
154
|
-
if (maximum > (
|
157
|
+
if (maximum > static_cast<T>(NumericLimits<T_S>::Maximum())) {
|
155
158
|
return;
|
156
159
|
}
|
157
160
|
|
@@ -172,21 +175,25 @@ public:
|
|
172
175
|
// Note: since we dont allow any values over NumericLimits<T_S>::Maximum(), all subtractions for unsigned types
|
173
176
|
// are guaranteed not to overflow
|
174
177
|
bool can_do_all = true;
|
175
|
-
if (
|
178
|
+
if (NumericLimits<T>::IsSigned()) {
|
176
179
|
T_S bogus;
|
177
|
-
can_do_all = TrySubtractOperator::Operation(
|
178
|
-
TrySubtractOperator::Operation(
|
180
|
+
can_do_all = TrySubtractOperator::Operation(static_cast<T_S>(minimum), static_cast<T_S>(maximum), bogus) &&
|
181
|
+
TrySubtractOperator::Operation(static_cast<T_S>(maximum), static_cast<T_S>(minimum), bogus);
|
179
182
|
}
|
180
183
|
|
181
184
|
// Calculate delta's
|
185
|
+
// compression_buffer pointer points one element ahead of the internal buffer making the use of signed index
|
186
|
+
// integer (-1) possible
|
187
|
+
D_ASSERT(compression_buffer_idx <= NumericLimits<int64_t>::Maximum());
|
182
188
|
if (can_do_all) {
|
183
|
-
for (int64_t i = 0; i < (
|
184
|
-
delta_buffer[i] = (
|
189
|
+
for (int64_t i = 0; i < static_cast<int64_t>(compression_buffer_idx); i++) {
|
190
|
+
delta_buffer[i] = static_cast<T_S>(compression_buffer[i]) - static_cast<T_S>(compression_buffer[i - 1]);
|
185
191
|
}
|
186
192
|
} else {
|
187
|
-
for (int64_t i = 0; i < (
|
188
|
-
auto success =
|
189
|
-
|
193
|
+
for (int64_t i = 0; i < static_cast<int64_t>(compression_buffer_idx); i++) {
|
194
|
+
auto success =
|
195
|
+
TrySubtractOperator::Operation(static_cast<T_S>(compression_buffer[i]),
|
196
|
+
static_cast<T_S>(compression_buffer[i - 1]), delta_buffer[i]);
|
190
197
|
if (!success) {
|
191
198
|
return;
|
192
199
|
}
|
@@ -195,7 +202,7 @@ public:
|
|
195
202
|
|
196
203
|
can_do_delta = true;
|
197
204
|
|
198
|
-
for (
|
205
|
+
for (idx_t i = 1; i < compression_buffer_idx; i++) {
|
199
206
|
maximum_delta = MaxValue<T_S>(maximum_delta, delta_buffer[i]);
|
200
207
|
minimum_delta = MinValue<T_S>(minimum_delta, delta_buffer[i]);
|
201
208
|
}
|
@@ -205,15 +212,15 @@ public:
|
|
205
212
|
delta_buffer[0] = minimum_delta;
|
206
213
|
|
207
214
|
can_do_delta = can_do_delta && TrySubtractOperator::Operation(maximum_delta, minimum_delta, min_max_delta_diff);
|
208
|
-
can_do_delta =
|
209
|
-
|
215
|
+
can_do_delta = can_do_delta && TrySubtractOperator::Operation(static_cast<T_S>(compression_buffer[0]),
|
216
|
+
minimum_delta, delta_offset);
|
210
217
|
}
|
211
218
|
|
212
219
|
template <class T_INNER>
|
213
220
|
void SubtractFrameOfReference(T_INNER *buffer, T_INNER frame_of_reference) {
|
214
|
-
static_assert(
|
221
|
+
static_assert(IsIntegral<T_INNER>::value, "Integral type required.");
|
215
222
|
for (idx_t i = 0; i < compression_buffer_idx; i++) {
|
216
|
-
buffer[i] -=
|
223
|
+
buffer[i] -= frame_of_reference;
|
217
224
|
}
|
218
225
|
}
|
219
226
|
|
@@ -234,23 +241,28 @@ public:
|
|
234
241
|
|
235
242
|
if (can_do_delta) {
|
236
243
|
if (maximum_delta == minimum_delta && mode != BitpackingMode::FOR && mode != BitpackingMode::DELTA_FOR) {
|
237
|
-
|
238
|
-
|
239
|
-
|
244
|
+
// FOR needs to be T (considering hugeint is bigger than idx_t)
|
245
|
+
T frame_of_reference = compression_buffer[0];
|
246
|
+
|
247
|
+
OP::WriteConstantDelta(maximum_delta, static_cast<T>(frame_of_reference), compression_buffer_idx,
|
248
|
+
compression_buffer, compression_buffer_validity, data_ptr);
|
240
249
|
total_size += sizeof(T) + sizeof(T) + sizeof(bitpacking_metadata_encoded_t);
|
241
250
|
return true;
|
242
251
|
}
|
243
252
|
|
244
253
|
// Check if delta has benefit
|
245
|
-
|
254
|
+
// bitwidth is calculated differently between signed and unsigned values, but considering we do not have
|
255
|
+
// an unsigned version of hugeint, we need to explicitly specify (through boolean) that we wish to calculate
|
256
|
+
// the unsigned minimum bit-width instead of relying on MakeUnsigned and IsSigned
|
257
|
+
auto delta_required_bitwidth = BitpackingPrimitives::MinimumBitWidth<T, false>(min_max_delta_diff);
|
246
258
|
auto regular_required_bitwidth = BitpackingPrimitives::MinimumBitWidth(min_max_diff);
|
247
259
|
|
248
260
|
if (delta_required_bitwidth < regular_required_bitwidth && mode != BitpackingMode::FOR) {
|
249
261
|
SubtractFrameOfReference(delta_buffer, minimum_delta);
|
250
262
|
|
251
|
-
OP::WriteDeltaFor(
|
252
|
-
|
253
|
-
data_ptr);
|
263
|
+
OP::WriteDeltaFor(reinterpret_cast<T *>(delta_buffer), compression_buffer_validity,
|
264
|
+
delta_required_bitwidth, static_cast<T>(minimum_delta), delta_offset,
|
265
|
+
compression_buffer, compression_buffer_idx, data_ptr);
|
254
266
|
|
255
267
|
total_size += BitpackingPrimitives::GetRequiredSize(compression_buffer_idx, delta_required_bitwidth);
|
256
268
|
total_size += sizeof(T); // FOR value
|
@@ -262,7 +274,7 @@ public:
|
|
262
274
|
}
|
263
275
|
|
264
276
|
if (can_do_for) {
|
265
|
-
auto width = BitpackingPrimitives::MinimumBitWidth<
|
277
|
+
auto width = BitpackingPrimitives::MinimumBitWidth<T, false>(min_max_diff);
|
266
278
|
SubtractFrameOfReference(compression_buffer, minimum);
|
267
279
|
OP::WriteFor(compression_buffer, compression_buffer_validity, width, minimum, compression_buffer_idx,
|
268
280
|
data_ptr);
|
@@ -320,7 +332,7 @@ unique_ptr<AnalyzeState> BitpackingInitAnalyze(ColumnData &col_data, PhysicalTyp
|
|
320
332
|
|
321
333
|
template <class T>
|
322
334
|
bool BitpackingAnalyze(AnalyzeState &state, Vector &input, idx_t count) {
|
323
|
-
auto &analyze_state =
|
335
|
+
auto &analyze_state = static_cast<BitpackingAnalyzeState<T> &>(state);
|
324
336
|
UnifiedVectorFormat vdata;
|
325
337
|
input.ToUnifiedFormat(count, vdata);
|
326
338
|
|
@@ -336,7 +348,7 @@ bool BitpackingAnalyze(AnalyzeState &state, Vector &input, idx_t count) {
|
|
336
348
|
|
337
349
|
template <class T>
|
338
350
|
idx_t BitpackingFinalAnalyze(AnalyzeState &state) {
|
339
|
-
auto &bitpacking_state =
|
351
|
+
auto &bitpacking_state = static_cast<BitpackingAnalyzeState<T> &>(state);
|
340
352
|
auto flush_result = bitpacking_state.state.template Flush<EmptyBitpackingWriter>();
|
341
353
|
if (!flush_result) {
|
342
354
|
return DConstants::INVALID_INDEX;
|
@@ -347,7 +359,7 @@ idx_t BitpackingFinalAnalyze(AnalyzeState &state) {
|
|
347
359
|
//===--------------------------------------------------------------------===//
|
348
360
|
// Compress
|
349
361
|
//===--------------------------------------------------------------------===//
|
350
|
-
template <class T, bool WRITE_STATISTICS, class T_S = typename
|
362
|
+
template <class T, bool WRITE_STATISTICS, class T_S = typename MakeSigned<T>::type>
|
351
363
|
struct BitpackingCompressState : public CompressionState {
|
352
364
|
public:
|
353
365
|
explicit BitpackingCompressState(ColumnDataCheckpointer &checkpointer)
|
@@ -355,7 +367,7 @@ public:
|
|
355
367
|
function(checkpointer.GetCompressionFunction(CompressionType::COMPRESSION_BITPACKING)) {
|
356
368
|
CreateEmptySegment(checkpointer.GetRowGroup().start);
|
357
369
|
|
358
|
-
state.data_ptr =
|
370
|
+
state.data_ptr = reinterpret_cast<void *>(this);
|
359
371
|
|
360
372
|
auto &config = DBConfig::GetConfig(checkpointer.GetDatabase());
|
361
373
|
state.mode = config.options.force_bitpacking_mode;
|
@@ -376,7 +388,7 @@ public:
|
|
376
388
|
public:
|
377
389
|
struct BitpackingWriter {
|
378
390
|
static void WriteConstant(T constant, idx_t count, void *data_ptr, bool all_invalid) {
|
379
|
-
auto state =
|
391
|
+
auto state = reinterpret_cast<BitpackingCompressState<T, WRITE_STATISTICS> *>(data_ptr);
|
380
392
|
|
381
393
|
ReserveSpace(state, sizeof(T));
|
382
394
|
WriteMetaData(state, BitpackingMode::CONSTANT);
|
@@ -387,7 +399,7 @@ public:
|
|
387
399
|
|
388
400
|
static void WriteConstantDelta(T_S constant, T frame_of_reference, idx_t count, T *values, bool *validity,
|
389
401
|
void *data_ptr) {
|
390
|
-
auto state =
|
402
|
+
auto state = reinterpret_cast<BitpackingCompressState<T, WRITE_STATISTICS> *>(data_ptr);
|
391
403
|
|
392
404
|
ReserveSpace(state, 2 * sizeof(T));
|
393
405
|
WriteMetaData(state, BitpackingMode::CONSTANT_DELTA);
|
@@ -396,17 +408,16 @@ public:
|
|
396
408
|
|
397
409
|
UpdateStats(state, count);
|
398
410
|
}
|
399
|
-
|
400
411
|
static void WriteDeltaFor(T *values, bool *validity, bitpacking_width_t width, T frame_of_reference,
|
401
412
|
T_S delta_offset, T *original_values, idx_t count, void *data_ptr) {
|
402
|
-
auto state =
|
413
|
+
auto state = reinterpret_cast<BitpackingCompressState<T, WRITE_STATISTICS> *>(data_ptr);
|
403
414
|
|
404
415
|
auto bp_size = BitpackingPrimitives::GetRequiredSize(count, width);
|
405
416
|
ReserveSpace(state, bp_size + 3 * sizeof(T));
|
406
417
|
|
407
418
|
WriteMetaData(state, BitpackingMode::DELTA_FOR);
|
408
419
|
WriteData(state->data_ptr, frame_of_reference);
|
409
|
-
WriteData(state->data_ptr, (
|
420
|
+
WriteData(state->data_ptr, static_cast<T>(width));
|
410
421
|
WriteData(state->data_ptr, delta_offset);
|
411
422
|
|
412
423
|
BitpackingPrimitives::PackBuffer<T, false>(state->data_ptr, values, count, width);
|
@@ -417,7 +428,7 @@ public:
|
|
417
428
|
|
418
429
|
static void WriteFor(T *values, bool *validity, bitpacking_width_t width, T frame_of_reference, idx_t count,
|
419
430
|
void *data_ptr) {
|
420
|
-
auto state =
|
431
|
+
auto state = reinterpret_cast<BitpackingCompressState<T, WRITE_STATISTICS> *>(data_ptr);
|
421
432
|
|
422
433
|
auto bp_size = BitpackingPrimitives::GetRequiredSize(count, width);
|
423
434
|
ReserveSpace(state, bp_size + 2 * sizeof(T));
|
@@ -434,7 +445,7 @@ public:
|
|
434
445
|
|
435
446
|
template <class T_OUT>
|
436
447
|
static void WriteData(data_ptr_t &ptr, T_OUT val) {
|
437
|
-
*
|
448
|
+
*reinterpret_cast<T_OUT *>(ptr) = val;
|
438
449
|
ptr += sizeof(T_OUT);
|
439
450
|
}
|
440
451
|
|
@@ -485,7 +496,7 @@ public:
|
|
485
496
|
auto data = UnifiedVectorFormat::GetData<T>(vdata);
|
486
497
|
|
487
498
|
for (idx_t i = 0; i < count; i++) {
|
488
|
-
|
499
|
+
idx_t idx = vdata.sel->get_index(i);
|
489
500
|
state.template Update<BitpackingCompressState<T, WRITE_STATISTICS, T_S>::BitpackingWriter>(
|
490
501
|
data[idx], vdata.validity.RowIsValid(idx));
|
491
502
|
}
|
@@ -493,7 +504,7 @@ public:
|
|
493
504
|
|
494
505
|
void FlushAndCreateSegmentIfFull(idx_t required_data_bytes, idx_t required_meta_bytes) {
|
495
506
|
if (!CanStore(required_data_bytes, required_meta_bytes)) {
|
496
|
-
|
507
|
+
idx_t row_start = current_segment->start + current_segment->count;
|
497
508
|
FlushSegment();
|
498
509
|
CreateEmptySegment(row_start);
|
499
510
|
}
|
@@ -537,7 +548,7 @@ unique_ptr<CompressionState> BitpackingInitCompression(ColumnDataCheckpointer &c
|
|
537
548
|
|
538
549
|
template <class T, bool WRITE_STATISTICS>
|
539
550
|
void BitpackingCompress(CompressionState &state_p, Vector &scan_vector, idx_t count) {
|
540
|
-
auto &state =
|
551
|
+
auto &state = static_cast<BitpackingCompressState<T, WRITE_STATISTICS> &>(state_p);
|
541
552
|
UnifiedVectorFormat vdata;
|
542
553
|
scan_vector.ToUnifiedFormat(count, vdata);
|
543
554
|
state.Append(vdata, count);
|
@@ -545,7 +556,7 @@ void BitpackingCompress(CompressionState &state_p, Vector &scan_vector, idx_t co
|
|
545
556
|
|
546
557
|
template <class T, bool WRITE_STATISTICS>
|
547
558
|
void BitpackingFinalizeCompress(CompressionState &state_p) {
|
548
|
-
auto &state =
|
559
|
+
auto &state = static_cast<BitpackingCompressState<T, WRITE_STATISTICS> &>(state_p);
|
549
560
|
state.Finalize();
|
550
561
|
}
|
551
562
|
|
@@ -588,7 +599,7 @@ static T DeltaDecode(T *data, T previous_value, const size_t size) {
|
|
588
599
|
return data[size - 1];
|
589
600
|
}
|
590
601
|
|
591
|
-
template <class T, class T_S = typename
|
602
|
+
template <class T, class T_S = typename MakeSigned<T>::type>
|
592
603
|
struct BitpackingScanState : public SegmentScanState {
|
593
604
|
public:
|
594
605
|
explicit BitpackingScanState(ColumnSegment &segment) : current_segment(segment) {
|
@@ -629,7 +640,7 @@ public:
|
|
629
640
|
D_ASSERT(bitpacking_metadata_ptr > handle.Ptr() &&
|
630
641
|
bitpacking_metadata_ptr < handle.Ptr() + Storage::BLOCK_SIZE);
|
631
642
|
current_group_offset = 0;
|
632
|
-
current_group = DecodeMeta(
|
643
|
+
current_group = DecodeMeta(reinterpret_cast<bitpacking_metadata_encoded_t *>(bitpacking_metadata_ptr));
|
633
644
|
|
634
645
|
bitpacking_metadata_ptr -= sizeof(bitpacking_metadata_encoded_t);
|
635
646
|
current_group_ptr = GetPtr(current_group);
|
@@ -637,13 +648,13 @@ public:
|
|
637
648
|
// Read first value
|
638
649
|
switch (current_group.mode) {
|
639
650
|
case BitpackingMode::CONSTANT:
|
640
|
-
current_constant = *
|
651
|
+
current_constant = *reinterpret_cast<T *>(current_group_ptr);
|
641
652
|
current_group_ptr += sizeof(T);
|
642
653
|
break;
|
643
654
|
case BitpackingMode::FOR:
|
644
655
|
case BitpackingMode::CONSTANT_DELTA:
|
645
656
|
case BitpackingMode::DELTA_FOR:
|
646
|
-
current_frame_of_reference = *
|
657
|
+
current_frame_of_reference = *reinterpret_cast<T *>(current_group_ptr);
|
647
658
|
current_group_ptr += sizeof(T);
|
648
659
|
break;
|
649
660
|
default:
|
@@ -653,12 +664,12 @@ public:
|
|
653
664
|
// Read second value
|
654
665
|
switch (current_group.mode) {
|
655
666
|
case BitpackingMode::CONSTANT_DELTA:
|
656
|
-
current_constant = *
|
667
|
+
current_constant = *reinterpret_cast<T *>(current_group_ptr);
|
657
668
|
current_group_ptr += sizeof(T);
|
658
669
|
break;
|
659
670
|
case BitpackingMode::FOR:
|
660
671
|
case BitpackingMode::DELTA_FOR:
|
661
|
-
current_width = (bitpacking_width_t)
|
672
|
+
current_width = (bitpacking_width_t)(*reinterpret_cast<T *>(current_group_ptr));
|
662
673
|
current_group_ptr += MaxValue(sizeof(T), sizeof(bitpacking_width_t));
|
663
674
|
break;
|
664
675
|
case BitpackingMode::CONSTANT:
|
@@ -669,7 +680,7 @@ public:
|
|
669
680
|
|
670
681
|
// Read third value
|
671
682
|
if (current_group.mode == BitpackingMode::DELTA_FOR) {
|
672
|
-
current_delta_offset = *
|
683
|
+
current_delta_offset = *reinterpret_cast<T *>(current_group_ptr);
|
673
684
|
current_group_ptr += sizeof(T);
|
674
685
|
}
|
675
686
|
}
|
@@ -694,10 +705,10 @@ public:
|
|
694
705
|
current_group_ptr + decompress_offset, decompress_count,
|
695
706
|
current_width, skip_sign_extension);
|
696
707
|
|
697
|
-
ApplyFrameOfReference<T_S>(
|
698
|
-
skip_count);
|
699
|
-
DeltaDecode<T_S>(
|
700
|
-
(
|
708
|
+
ApplyFrameOfReference<T_S>(reinterpret_cast<T_S *>(decompression_buffer + extra_count),
|
709
|
+
current_frame_of_reference, skip_count);
|
710
|
+
DeltaDecode<T_S>(reinterpret_cast<T_S *>(decompression_buffer + extra_count),
|
711
|
+
static_cast<T_S>(current_delta_offset), skip_count);
|
701
712
|
current_delta_offset = decompression_buffer[extra_count + skip_count - 1];
|
702
713
|
|
703
714
|
current_group_offset += skip_count;
|
@@ -734,10 +745,10 @@ unique_ptr<SegmentScanState> BitpackingInitScan(ColumnSegment &segment) {
|
|
734
745
|
//===--------------------------------------------------------------------===//
|
735
746
|
// Scan base data
|
736
747
|
//===--------------------------------------------------------------------===//
|
737
|
-
template <class T, class T_S = typename
|
748
|
+
template <class T, class T_S = typename MakeSigned<T>::type>
|
738
749
|
void BitpackingScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result,
|
739
750
|
idx_t result_offset) {
|
740
|
-
auto &scan_state =
|
751
|
+
auto &scan_state = static_cast<BitpackingScanState<T> &>(*state.scan_state);
|
741
752
|
|
742
753
|
T *result_data = FlatVector::GetData<T>(result);
|
743
754
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
@@ -772,7 +783,7 @@ void BitpackingScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t
|
|
772
783
|
T *target_ptr = result_data + result_offset + scanned;
|
773
784
|
|
774
785
|
for (idx_t i = 0; i < to_scan; i++) {
|
775
|
-
target_ptr[i] = ((scan_state.current_group_offset + i) * scan_state.current_constant) +
|
786
|
+
target_ptr[i] = (static_cast<T>(scan_state.current_group_offset + i) * scan_state.current_constant) +
|
776
787
|
scan_state.current_frame_of_reference;
|
777
788
|
}
|
778
789
|
|
@@ -808,9 +819,11 @@ void BitpackingScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t
|
|
808
819
|
}
|
809
820
|
|
810
821
|
if (scan_state.current_group.mode == BitpackingMode::DELTA_FOR) {
|
811
|
-
ApplyFrameOfReference<T_S>(
|
812
|
-
|
813
|
-
|
822
|
+
ApplyFrameOfReference<T_S>(reinterpret_cast<T_S *>(current_result_ptr),
|
823
|
+
static_cast<T_S>(scan_state.current_frame_of_reference), to_scan);
|
824
|
+
DeltaDecode<T_S>(reinterpret_cast<T_S *>(current_result_ptr),
|
825
|
+
static_cast<T_S>(scan_state.current_delta_offset), to_scan);
|
826
|
+
scan_state.current_delta_offset = current_result_ptr[to_scan - 1];
|
814
827
|
} else {
|
815
828
|
ApplyFrameOfReference<T>(current_result_ptr, scan_state.current_frame_of_reference, to_scan);
|
816
829
|
}
|
@@ -833,7 +846,7 @@ void BitpackingFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t r
|
|
833
846
|
idx_t result_idx) {
|
834
847
|
BitpackingScanState<T> scan_state(segment);
|
835
848
|
scan_state.Skip(segment, row_id);
|
836
|
-
|
849
|
+
T *result_data = FlatVector::GetData<T>(result);
|
837
850
|
T *current_result_ptr = result_data + result_idx;
|
838
851
|
|
839
852
|
idx_t offset_in_compression_group =
|
@@ -852,8 +865,16 @@ void BitpackingFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t r
|
|
852
865
|
}
|
853
866
|
|
854
867
|
if (scan_state.current_group.mode == BitpackingMode::CONSTANT_DELTA) {
|
855
|
-
|
856
|
-
|
868
|
+
#ifdef DEBUG
|
869
|
+
// overflow check
|
870
|
+
T result;
|
871
|
+
bool multiply = TryMultiplyOperator::Operation(static_cast<T>(scan_state.current_group_offset),
|
872
|
+
scan_state.current_constant, result);
|
873
|
+
bool add = TryAddOperator::Operation(result, scan_state.current_frame_of_reference, result);
|
874
|
+
D_ASSERT(multiply && add);
|
875
|
+
#endif
|
876
|
+
*current_result_ptr = (static_cast<T>(scan_state.current_group_offset) * scan_state.current_constant) +
|
877
|
+
scan_state.current_frame_of_reference;
|
857
878
|
return;
|
858
879
|
}
|
859
880
|
|
@@ -863,7 +884,7 @@ void BitpackingFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t r
|
|
863
884
|
BitpackingPrimitives::UnPackBlock<T>(data_ptr_cast(scan_state.decompression_buffer),
|
864
885
|
decompression_group_start_pointer, scan_state.current_width, skip_sign_extend);
|
865
886
|
|
866
|
-
*current_result_ptr =
|
887
|
+
*current_result_ptr = scan_state.decompression_buffer[offset_in_compression_group];
|
867
888
|
*current_result_ptr += scan_state.current_frame_of_reference;
|
868
889
|
|
869
890
|
if (scan_state.current_group.mode == BitpackingMode::DELTA_FOR) {
|
@@ -872,7 +893,7 @@ void BitpackingFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t r
|
|
872
893
|
}
|
873
894
|
template <class T>
|
874
895
|
void BitpackingSkip(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count) {
|
875
|
-
auto &scan_state =
|
896
|
+
auto &scan_state = static_cast<BitpackingScanState<T> &>(*state.scan_state);
|
876
897
|
scan_state.Skip(segment, skip_count);
|
877
898
|
}
|
878
899
|
|
@@ -907,6 +928,8 @@ CompressionFunction BitpackingFun::GetFunction(PhysicalType type) {
|
|
907
928
|
return GetBitpackingFunction<uint32_t>(type);
|
908
929
|
case PhysicalType::UINT64:
|
909
930
|
return GetBitpackingFunction<uint64_t>(type);
|
931
|
+
case PhysicalType::INT128:
|
932
|
+
return GetBitpackingFunction<hugeint_t>(type);
|
910
933
|
case PhysicalType::LIST:
|
911
934
|
return GetBitpackingFunction<uint64_t, false>(type);
|
912
935
|
default:
|
@@ -926,6 +949,7 @@ bool BitpackingFun::TypeIsSupported(PhysicalType type) {
|
|
926
949
|
case PhysicalType::UINT32:
|
927
950
|
case PhysicalType::UINT64:
|
928
951
|
case PhysicalType::LIST:
|
952
|
+
case PhysicalType::INT128:
|
929
953
|
return true;
|
930
954
|
default:
|
931
955
|
return false;
|