duckdb 0.4.1-dev1225.0 → 0.4.1-dev1254.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.4.1-dev1225.0",
4
+ "version": "0.4.1-dev1254.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -8230,10 +8230,10 @@ SequenceException::SequenceException(const string &msg) : Exception(ExceptionTyp
8230
8230
  InterruptException::InterruptException() : Exception(ExceptionType::INTERRUPT, "Interrupted!") {
8231
8231
  }
8232
8232
 
8233
- FatalException::FatalException(const string &msg) : Exception(ExceptionType::FATAL, msg) {
8233
+ FatalException::FatalException(ExceptionType type, const string &msg) : Exception(type, msg) {
8234
8234
  }
8235
8235
 
8236
- InternalException::InternalException(const string &msg) : Exception(ExceptionType::INTERNAL, msg) {
8236
+ InternalException::InternalException(const string &msg) : FatalException(ExceptionType::INTERNAL, msg) {
8237
8237
  }
8238
8238
 
8239
8239
  InvalidInputException::InvalidInputException(const string &msg) : Exception(ExceptionType::INVALID_INPUT, msg) {
@@ -118897,6 +118897,11 @@ unique_ptr<DataChunk> ClientContext::FetchInternal(ClientContextLock &lock, Exec
118897
118897
  // standard exceptions do not invalidate the current transaction
118898
118898
  result.error = ex.what();
118899
118899
  invalidate_query = false;
118900
+ } catch (FatalException &ex) {
118901
+ // fatal exceptions invalidate the entire database
118902
+ result.error = ex.what();
118903
+ auto &db = DatabaseInstance::GetDatabase(*this);
118904
+ db.Invalidate();
118900
118905
  } catch (std::exception &ex) {
118901
118906
  result.error = ex.what();
118902
118907
  } catch (...) { // LCOV_EXCL_START
@@ -118910,6 +118915,10 @@ unique_ptr<DataChunk> ClientContext::FetchInternal(ClientContextLock &lock, Exec
118910
118915
  void ClientContext::BeginTransactionInternal(ClientContextLock &lock, bool requires_valid_transaction) {
118911
118916
  // check if we are on AutoCommit. In this case we should start a transaction
118912
118917
  D_ASSERT(!active_query);
118918
+ auto &db = DatabaseInstance::GetDatabase(*this);
118919
+ if (db.IsInvalidated()) {
118920
+ throw FatalException("Failed: database has been invalidated!");
118921
+ }
118913
118922
  if (requires_valid_transaction && transaction.HasActiveTransaction() &&
118914
118923
  transaction.ActiveTransaction().IsInvalidated()) {
118915
118924
  throw Exception("Failed: transaction has been invalidated!");
@@ -118958,6 +118967,10 @@ string ClientContext::EndQueryInternal(ClientContextLock &lock, bool success, bo
118958
118967
  ActiveTransaction().Invalidate();
118959
118968
  }
118960
118969
  }
118970
+ } catch (FatalException &ex) {
118971
+ auto &db = DatabaseInstance::GetDatabase(*this);
118972
+ db.Invalidate();
118973
+ error = ex.what();
118961
118974
  } catch (std::exception &ex) {
118962
118975
  error = ex.what();
118963
118976
  } catch (...) { // LCOV_EXCL_START
@@ -119401,7 +119414,17 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
119401
119414
  shared_ptr<PreparedStatementData> &prepared, PendingQueryParameters parameters) {
119402
119415
  unique_ptr<PendingQueryResult> result;
119403
119416
 
119404
- BeginQueryInternal(lock, query);
119417
+ try {
119418
+ BeginQueryInternal(lock, query);
119419
+ } catch (FatalException &ex) {
119420
+ // fatal exceptions invalidate the entire database
119421
+ auto &db = DatabaseInstance::GetDatabase(*this);
119422
+ db.Invalidate();
119423
+ result = make_unique<PendingQueryResult>(ex.what());
119424
+ return result;
119425
+ } catch (std::exception &ex) {
119426
+ return make_unique<PendingQueryResult>(ex.what());
119427
+ }
119405
119428
  // start the profiler
119406
119429
  auto &profiler = QueryProfiler::Get(*this);
119407
119430
  profiler.StartQuery(query, IsExplainAnalyze(statement ? statement.get() : prepared->unbound_statement.get()));
@@ -119425,6 +119448,11 @@ unique_ptr<PendingQueryResult> ClientContext::PendingStatementOrPreparedStatemen
119425
119448
  // standard exceptions do not invalidate the current transaction
119426
119449
  result = make_unique<PendingQueryResult>(ex.what());
119427
119450
  invalidate_query = false;
119451
+ } catch (FatalException &ex) {
119452
+ // fatal exceptions invalidate the entire database
119453
+ auto &db = DatabaseInstance::GetDatabase(*this);
119454
+ db.Invalidate();
119455
+ result = make_unique<PendingQueryResult>(ex.what());
119428
119456
  } catch (std::exception &ex) {
119429
119457
  // other types of exceptions do invalidate the current transaction
119430
119458
  result = make_unique<PendingQueryResult>(ex.what());
@@ -119902,6 +119930,10 @@ void ClientContext::RunFunctionInTransactionInternal(ClientContextLock &lock, co
119902
119930
  transaction.Rollback();
119903
119931
  }
119904
119932
  throw;
119933
+ } catch (FatalException &ex) {
119934
+ auto &db = DatabaseInstance::GetDatabase(*this);
119935
+ db.Invalidate();
119936
+ throw;
119905
119937
  } catch (std::exception &ex) {
119906
119938
  if (require_new_transaction) {
119907
119939
  transaction.Rollback();
@@ -121405,6 +121437,13 @@ string ClientConfig::ExtractTimezoneFromConfig(ClientConfig &config) {
121405
121437
  }
121406
121438
  }
121407
121439
 
121440
+ void DatabaseInstance::Invalidate() {
121441
+ this->is_invalidated = true;
121442
+ }
121443
+ bool DatabaseInstance::IsInvalidated() {
121444
+ return this->is_invalidated;
121445
+ }
121446
+
121408
121447
  } // namespace duckdb
121409
121448
 
121410
121449
 
@@ -183064,6 +183103,13 @@ public:
183064
183103
  return FindMinimumBitWidth<T, BYTE_ALIGNED>(values, count);
183065
183104
  }
183066
183105
 
183106
+ // Calculates the minimum required number of bits per value that can store all values,
183107
+ // given a predetermined minimum and maximum value of the buffer
183108
+ template <class T>
183109
+ inline static bitpacking_width_t MinimumBitWidth(T minimum, T maximum) {
183110
+ return FindMinimumBitWidth<T, BYTE_ALIGNED>(minimum, maximum);
183111
+ }
183112
+
183067
183113
  template <class T>
183068
183114
  inline static idx_t GetRequiredSize(idx_t count, bitpacking_width_t width) {
183069
183115
  count = RoundUpToAlgorithmGroupSize(count);
@@ -183152,6 +183198,18 @@ private:
183152
183198
  }
183153
183199
  }
183154
183200
 
183201
+ // Sign bit extension
183202
+ template <class T, class T_U = typename std::make_unsigned<T>::type>
183203
+ static void SignExtend(data_ptr_t dst, bitpacking_width_t width) {
183204
+ T const mask = ((T_U)1) << (width - 1);
183205
+ for (idx_t i = 0; i < BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE; ++i) {
183206
+ T value = Load<T>(dst + i * sizeof(T));
183207
+ value = value & ((((T_U)1) << width) - ((T_U)1));
183208
+ T result = (value ^ mask) - mask;
183209
+ Store(result, dst + i * sizeof(T));
183210
+ }
183211
+ }
183212
+
183155
183213
  template <class T>
183156
183214
  static void UnPackGroup(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width,
183157
183215
  bool skip_sign_extension = false) {
@@ -183175,33 +183233,14 @@ private:
183175
183233
  // Prevent compression at widths that are ineffective
183176
183234
  template <class T>
183177
183235
  static bitpacking_width_t GetEffectiveWidth(bitpacking_width_t width) {
183178
- if (width > 56) {
183179
- return 64;
183180
- }
183181
-
183182
- if (width > 28 && (std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value)) {
183183
- return 32;
183184
- }
183185
-
183186
- else if (width > 14 && (std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value)) {
183187
- return 16;
183236
+ auto bits_of_type = sizeof(T) * 8;
183237
+ auto type_size = sizeof(T);
183238
+ if (width + type_size > bits_of_type) {
183239
+ return bits_of_type;
183188
183240
  }
183189
-
183190
183241
  return width;
183191
183242
  }
183192
183243
 
183193
- // Sign bit extension
183194
- template <class T, class T_U = typename std::make_unsigned<T>::type>
183195
- static void SignExtend(data_ptr_t dst, bitpacking_width_t width) {
183196
- T const mask = ((T_U)1) << (width - 1);
183197
- for (idx_t i = 0; i < BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE; ++i) {
183198
- T value = Load<T>(dst + i * sizeof(T));
183199
- value = value & ((((T_U)1) << width) - ((T_U)1));
183200
- T result = (value ^ mask) - mask;
183201
- Store(result, dst + i * sizeof(T));
183202
- }
183203
- }
183204
-
183205
183244
  template <class T>
183206
183245
  static void PackGroup(data_ptr_t dst, T *values, bitpacking_width_t width) {
183207
183246
  if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) {
@@ -183465,17 +183504,19 @@ private:
183465
183504
 
183466
183505
 
183467
183506
 
183507
+
183468
183508
  #include <functional>
183469
183509
 
183470
183510
  namespace duckdb {
183471
183511
 
183472
183512
  // Note that optimizations in scanning only work if this value is equal to STANDARD_VECTOR_SIZE, however we keep them
183473
183513
  // separated to prevent the code from break on lower vector sizes
183474
- static constexpr const idx_t BITPACKING_WIDTH_GROUP_SIZE = 1024;
183514
+ static constexpr const idx_t BITPACKING_METADATA_GROUP_SIZE = 1024;
183475
183515
 
183476
183516
  struct EmptyBitpackingWriter {
183477
183517
  template <class T>
183478
- static void Operation(T *values, bool *validity, bitpacking_width_t width, idx_t count, void *data_ptr) {
183518
+ static void Operation(T *values, bool *validity, bitpacking_width_t width, T frame_of_reference, idx_t count,
183519
+ void *data_ptr) {
183479
183520
  }
183480
183521
  };
183481
183522
 
@@ -183483,39 +183524,95 @@ template <class T>
183483
183524
  struct BitpackingState {
183484
183525
  public:
183485
183526
  BitpackingState() : compression_buffer_idx(0), total_size(0), data_ptr(nullptr) {
183527
+ ResetMinMax();
183486
183528
  }
183487
183529
 
183488
- T compression_buffer[BITPACKING_WIDTH_GROUP_SIZE];
183489
- bool compression_buffer_validity[BITPACKING_WIDTH_GROUP_SIZE];
183530
+ T compression_buffer[BITPACKING_METADATA_GROUP_SIZE];
183531
+ bool compression_buffer_validity[BITPACKING_METADATA_GROUP_SIZE];
183490
183532
  idx_t compression_buffer_idx;
183491
183533
  idx_t total_size;
183492
183534
  void *data_ptr;
183493
183535
 
183536
+ bool min_max_set;
183537
+ T minimum;
183538
+ T maximum;
183539
+
183494
183540
  public:
183495
- template <class OP>
183541
+ void SubtractFrameOfReference(const T &frame_of_reference) {
183542
+ for (idx_t i = 0; i < compression_buffer_idx; i++) {
183543
+ compression_buffer[i] -= frame_of_reference;
183544
+ }
183545
+ }
183546
+
183547
+ void ResetMinMax() {
183548
+ min_max_set = false;
183549
+ //! We set these to 0, in case all values are NULL, in which case the min and max will never be set.
183550
+ minimum = 0;
183551
+ maximum = 0;
183552
+ }
183553
+
183554
+ bool TryUpdateMinMax(T value) {
183555
+ bool updated = false;
183556
+ if (!min_max_set || value < minimum) {
183557
+ minimum = value;
183558
+ updated = true;
183559
+ }
183560
+ if (!min_max_set || value > maximum) {
183561
+ maximum = value;
183562
+ updated = true;
183563
+ }
183564
+ min_max_set = min_max_set || updated;
183565
+ //! Only when either of the values are updated, do we need to test the overflow
183566
+ if (updated) {
183567
+ T ignore;
183568
+ return TrySubtractOperator::Operation(maximum, minimum, ignore);
183569
+ }
183570
+ return true;
183571
+ }
183572
+
183573
+ T GetFrameOfReference() {
183574
+ return minimum;
183575
+ }
183576
+ T Maximum() {
183577
+ return maximum;
183578
+ }
183579
+
183580
+ template <class OP, class T_U = typename std::make_unsigned<T>::type>
183496
183581
  void Flush() {
183497
- bitpacking_width_t width = BitpackingPrimitives::MinimumBitWidth<T>(compression_buffer, compression_buffer_idx);
183498
- OP::Operation(compression_buffer, compression_buffer_validity, width, compression_buffer_idx, data_ptr);
183499
- total_size += (BITPACKING_WIDTH_GROUP_SIZE * width) / 8 + sizeof(bitpacking_width_t);
183582
+ T frame_of_reference = GetFrameOfReference();
183583
+ SubtractFrameOfReference(frame_of_reference);
183584
+
183585
+ //! Because of FOR, we can guarantee that all values are positive
183586
+ T_U adjusted_maximum = T_U(Maximum() - frame_of_reference);
183587
+
183588
+ bitpacking_width_t width = BitpackingPrimitives::MinimumBitWidth<T_U>((T_U)0, adjusted_maximum);
183589
+ OP::template Operation<T>(compression_buffer, compression_buffer_validity, width, frame_of_reference,
183590
+ compression_buffer_idx, data_ptr);
183591
+ total_size += (BITPACKING_METADATA_GROUP_SIZE * width) / 8 + sizeof(bitpacking_width_t) + sizeof(T);
183500
183592
  compression_buffer_idx = 0;
183593
+ ResetMinMax();
183501
183594
  }
183502
183595
 
183503
183596
  template <class OP = EmptyBitpackingWriter>
183504
- void Update(T *data, ValidityMask &validity, idx_t idx) {
183597
+ bool Update(T *data, ValidityMask &validity, idx_t idx) {
183505
183598
 
183506
183599
  if (validity.RowIsValid(idx)) {
183507
183600
  compression_buffer_validity[compression_buffer_idx] = true;
183508
183601
  compression_buffer[compression_buffer_idx++] = data[idx];
183602
+ if (!TryUpdateMinMax(data[idx])) {
183603
+ return false;
183604
+ }
183509
183605
  } else {
183510
183606
  // We write zero for easy bitwidth analysis of the compression buffer later
183511
183607
  compression_buffer_validity[compression_buffer_idx] = false;
183512
183608
  compression_buffer[compression_buffer_idx++] = 0;
183513
183609
  }
183514
183610
 
183515
- if (compression_buffer_idx == BITPACKING_WIDTH_GROUP_SIZE) {
183611
+ if (compression_buffer_idx == BITPACKING_METADATA_GROUP_SIZE) {
183516
183612
  // Calculate bitpacking width;
183517
183613
  Flush<OP>();
183518
183614
  }
183615
+ return true;
183519
183616
  }
183520
183617
  };
183521
183618
 
@@ -183541,9 +183638,10 @@ bool BitpackingAnalyze(AnalyzeState &state, Vector &input, idx_t count) {
183541
183638
  auto data = (T *)vdata.data;
183542
183639
  for (idx_t i = 0; i < count; i++) {
183543
183640
  auto idx = vdata.sel->get_index(i);
183544
- analyze_state.state.template Update<EmptyBitpackingWriter>(data, vdata.validity, idx);
183641
+ if (!analyze_state.state.template Update<EmptyBitpackingWriter>(data, vdata.validity, idx)) {
183642
+ return false;
183643
+ }
183545
183644
  }
183546
-
183547
183645
  return true;
183548
183646
  }
183549
183647
 
@@ -183577,19 +183675,25 @@ public:
183577
183675
 
183578
183676
  // Ptr to next free spot in segment;
183579
183677
  data_ptr_t data_ptr;
183580
- // Ptr to next free spot for storing bitwidths (growing downwards).
183581
- data_ptr_t width_ptr;
183678
+ // Ptr to next free spot for storing bitwidths and frame-of-references (growing downwards).
183679
+ data_ptr_t metadata_ptr;
183582
183680
 
183583
183681
  BitpackingState<T> state;
183584
183682
 
183585
183683
  public:
183586
183684
  struct BitpackingWriter {
183685
+
183587
183686
  template <class VALUE_TYPE>
183588
- static void Operation(VALUE_TYPE *values, bool *validity, bitpacking_width_t width, idx_t count,
183589
- void *data_ptr) {
183687
+ static void Operation(VALUE_TYPE *values, bool *validity, bitpacking_width_t width,
183688
+ VALUE_TYPE frame_of_reference, idx_t count, void *data_ptr) {
183590
183689
  auto state = (BitpackingCompressState<T> *)data_ptr;
183690
+ auto total_bits_needed = (width * BITPACKING_METADATA_GROUP_SIZE);
183691
+ D_ASSERT(total_bits_needed % 8 == 0);
183692
+ auto total_bytes_needed = total_bits_needed / 8;
183693
+ total_bytes_needed += sizeof(bitpacking_width_t);
183694
+ total_bytes_needed += sizeof(VALUE_TYPE);
183591
183695
 
183592
- if (state->RemainingSize() < (width * BITPACKING_WIDTH_GROUP_SIZE) / 8 + sizeof(bitpacking_width_t)) {
183696
+ if (state->RemainingSize() < total_bytes_needed) {
183593
183697
  // Segment is full
183594
183698
  auto row_start = state->current_segment->start + state->current_segment->count;
183595
183699
  state->FlushSegment();
@@ -183598,17 +183702,17 @@ public:
183598
183702
 
183599
183703
  for (idx_t i = 0; i < count; i++) {
183600
183704
  if (validity[i]) {
183601
- NumericStatistics::Update<T>(state->current_segment->stats, values[i]);
183705
+ NumericStatistics::Update<T>(state->current_segment->stats, values[i] + frame_of_reference);
183602
183706
  }
183603
183707
  }
183604
183708
 
183605
- state->WriteValues(values, width, count);
183709
+ state->WriteValues(values, width, frame_of_reference, count);
183606
183710
  }
183607
183711
  };
183608
183712
 
183609
- // Space remaining between the width_ptr growing down and data ptr growing up
183713
+ // Space remaining between the metadata_ptr growing down and data ptr growing up
183610
183714
  idx_t RemainingSize() {
183611
- return width_ptr - data_ptr;
183715
+ return metadata_ptr - data_ptr;
183612
183716
  }
183613
183717
 
183614
183718
  void CreateEmptySegment(idx_t row_start) {
@@ -183621,7 +183725,8 @@ public:
183621
183725
  handle = buffer_manager.Pin(current_segment->block);
183622
183726
 
183623
183727
  data_ptr = handle.Ptr() + current_segment->GetBlockOffset() + BitpackingPrimitives::BITPACKING_HEADER_SIZE;
183624
- width_ptr = handle.Ptr() + current_segment->GetBlockOffset() + Storage::BLOCK_SIZE - sizeof(bitpacking_width_t);
183728
+ metadata_ptr =
183729
+ handle.Ptr() + current_segment->GetBlockOffset() + Storage::BLOCK_SIZE - sizeof(bitpacking_width_t);
183625
183730
  }
183626
183731
 
183627
183732
  void Append(UnifiedVectorFormat &vdata, idx_t count) {
@@ -183634,13 +183739,15 @@ public:
183634
183739
  }
183635
183740
  }
183636
183741
 
183637
- void WriteValues(T *values, bitpacking_width_t width, idx_t count) {
183638
- // TODO we can optimize this by stopping early if count < BITPACKING_WIDTH_GROUP_SIZE
183742
+ void WriteValues(T *values, bitpacking_width_t width, T frame_of_reference, idx_t count) {
183743
+ // TODO we can optimize this by stopping early if count < BITPACKING_METADATA_GROUP_SIZE
183639
183744
  BitpackingPrimitives::PackBuffer<T, false>(data_ptr, values, count, width);
183640
- data_ptr += (BITPACKING_WIDTH_GROUP_SIZE * width) / 8;
183745
+ data_ptr += (BITPACKING_METADATA_GROUP_SIZE * width) / 8;
183641
183746
 
183642
- Store<bitpacking_width_t>(width, width_ptr);
183643
- width_ptr -= sizeof(bitpacking_width_t);
183747
+ Store<bitpacking_width_t>(width, metadata_ptr);
183748
+ metadata_ptr -= sizeof(T);
183749
+ Store<T>(frame_of_reference, metadata_ptr);
183750
+ metadata_ptr -= sizeof(bitpacking_width_t);
183644
183751
 
183645
183752
  current_segment->count += count;
183646
183753
  }
@@ -183649,14 +183756,14 @@ public:
183649
183756
  auto &state = checkpointer.GetCheckpointState();
183650
183757
  auto dataptr = handle.Ptr();
183651
183758
 
183652
- // Compact the segment by moving the widths next to the data.
183653
- idx_t minimal_widths_offset = AlignValue(data_ptr - dataptr);
183654
- idx_t widths_size = dataptr + Storage::BLOCK_SIZE - width_ptr - 1;
183655
- idx_t total_segment_size = minimal_widths_offset + widths_size;
183656
- memmove(dataptr + minimal_widths_offset, width_ptr + 1, widths_size);
183759
+ // Compact the segment by moving the metadata next to the data.
183760
+ idx_t metadata_offset = AlignValue(data_ptr - dataptr);
183761
+ idx_t metadata_size = dataptr + Storage::BLOCK_SIZE - metadata_ptr - 1;
183762
+ idx_t total_segment_size = metadata_offset + metadata_size;
183763
+ memmove(dataptr + metadata_offset, metadata_ptr + 1, metadata_size);
183657
183764
 
183658
- // Store the offset of the first width (which is at the highest address).
183659
- Store<idx_t>(minimal_widths_offset + widths_size - 1, dataptr);
183765
+ // Store the offset of the metadata of the first group (which is at the highest address).
183766
+ Store<idx_t>(metadata_offset + metadata_size - 1, dataptr);
183660
183767
  handle.Destroy();
183661
183768
 
183662
183769
  state.FlushSegment(move(current_segment), total_segment_size);
@@ -183699,14 +183806,14 @@ public:
183699
183806
  auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
183700
183807
  handle = buffer_manager.Pin(segment.block);
183701
183808
  auto dataptr = handle.Ptr();
183702
- current_width_group_ptr = dataptr + segment.GetBlockOffset() + BitpackingPrimitives::BITPACKING_HEADER_SIZE;
183809
+ current_metadata_group_ptr = dataptr + segment.GetBlockOffset() + BitpackingPrimitives::BITPACKING_HEADER_SIZE;
183703
183810
 
183704
183811
  // load offset to bitpacking widths pointer
183705
- auto bitpacking_widths_offset = Load<idx_t>(dataptr + segment.GetBlockOffset());
183706
- bitpacking_width_ptr = dataptr + segment.GetBlockOffset() + bitpacking_widths_offset;
183812
+ auto bitpacking_metadata_offset = Load<idx_t>(dataptr + segment.GetBlockOffset());
183813
+ bitpacking_metadata_ptr = dataptr + segment.GetBlockOffset() + bitpacking_metadata_offset;
183707
183814
 
183708
- // load the bitwidth of the first vector
183709
- LoadCurrentBitWidth();
183815
+ // load the metadata of the first vector
183816
+ LoadCurrentMetaData();
183710
183817
  }
183711
183818
 
183712
183819
  BufferHandle handle;
@@ -183715,32 +183822,37 @@ public:
183715
183822
  T decompression_buffer[BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE];
183716
183823
 
183717
183824
  idx_t position_in_group = 0;
183718
- data_ptr_t current_width_group_ptr;
183719
- data_ptr_t bitpacking_width_ptr;
183825
+ data_ptr_t current_metadata_group_ptr;
183826
+ data_ptr_t bitpacking_metadata_ptr;
183720
183827
  bitpacking_width_t current_width;
183828
+ T current_frame_of_reference;
183721
183829
 
183722
183830
  public:
183723
- void LoadCurrentBitWidth() {
183724
- D_ASSERT(bitpacking_width_ptr > handle.Ptr() && bitpacking_width_ptr < handle.Ptr() + Storage::BLOCK_SIZE);
183725
- current_width = Load<bitpacking_width_t>(bitpacking_width_ptr);
183831
+ //! Loads the current group header, and sets pointer to next header
183832
+ void LoadCurrentMetaData() {
183833
+ D_ASSERT(bitpacking_metadata_ptr > handle.Ptr() &&
183834
+ bitpacking_metadata_ptr < handle.Ptr() + Storage::BLOCK_SIZE);
183835
+ current_width = Load<bitpacking_width_t>(bitpacking_metadata_ptr);
183836
+ bitpacking_metadata_ptr -= sizeof(T);
183837
+ current_frame_of_reference = Load<T>(bitpacking_metadata_ptr);
183838
+ bitpacking_metadata_ptr -= sizeof(bitpacking_width_t);
183726
183839
  LoadDecompressFunction();
183727
183840
  }
183728
183841
 
183729
183842
  void Skip(ColumnSegment &segment, idx_t skip_count) {
183730
183843
  while (skip_count > 0) {
183731
- if (position_in_group + skip_count < BITPACKING_WIDTH_GROUP_SIZE) {
183844
+ if (position_in_group + skip_count < BITPACKING_METADATA_GROUP_SIZE) {
183732
183845
  // We're not leaving this bitpacking group, we can perform all skips.
183733
183846
  position_in_group += skip_count;
183734
183847
  break;
183735
183848
  } else {
183736
183849
  // The skip crosses the current bitpacking group, we skip the remainder of this group.
183737
- auto skipping = BITPACKING_WIDTH_GROUP_SIZE - position_in_group;
183850
+ auto skipping = BITPACKING_METADATA_GROUP_SIZE - position_in_group;
183738
183851
  position_in_group = 0;
183739
- current_width_group_ptr += (current_width * BITPACKING_WIDTH_GROUP_SIZE) / 8;
183852
+ current_metadata_group_ptr += (current_width * BITPACKING_METADATA_GROUP_SIZE) / 8;
183740
183853
 
183741
- // Update width pointer and load new width
183742
- bitpacking_width_ptr -= sizeof(bitpacking_width_t);
183743
- LoadCurrentBitWidth();
183854
+ // Load new width
183855
+ LoadCurrentMetaData();
183744
183856
 
183745
183857
  skip_count -= skipping;
183746
183858
  }
@@ -183758,6 +183870,16 @@ unique_ptr<SegmentScanState> BitpackingInitScan(ColumnSegment &segment) {
183758
183870
  return move(result);
183759
183871
  }
183760
183872
 
183873
+ template <class T>
183874
+ static void ApplyFrameOfReference(T *dst, T frame_of_reference, idx_t size) {
183875
+ if (!frame_of_reference) {
183876
+ return;
183877
+ }
183878
+ for (idx_t i = 0; i < size; i++) {
183879
+ dst[i] += frame_of_reference;
183880
+ }
183881
+ }
183882
+
183761
183883
  //===--------------------------------------------------------------------===//
183762
183884
  // Scan base data
183763
183885
  //===--------------------------------------------------------------------===//
@@ -183770,31 +183892,28 @@ void BitpackingScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t
183770
183892
  result.SetVectorType(VectorType::FLAT_VECTOR);
183771
183893
 
183772
183894
  // Fast path for when no compression was used, we can do a single memcopy
183773
- if (STANDARD_VECTOR_SIZE == BITPACKING_WIDTH_GROUP_SIZE) {
183774
- if (scan_state.current_width == sizeof(T) * 8 && scan_count <= BITPACKING_WIDTH_GROUP_SIZE &&
183775
- scan_state.position_in_group == 0) {
183776
-
183777
- memcpy(result_data + result_offset, scan_state.current_width_group_ptr, scan_count * sizeof(T));
183778
- scan_state.current_width_group_ptr += scan_count * sizeof(T);
183779
- scan_state.bitpacking_width_ptr -= sizeof(bitpacking_width_t);
183780
- scan_state.LoadCurrentBitWidth();
183895
+ if (STANDARD_VECTOR_SIZE == BITPACKING_METADATA_GROUP_SIZE) {
183896
+ if (scan_state.current_frame_of_reference == 0 && scan_state.current_width == sizeof(T) * 8 &&
183897
+ scan_count <= BITPACKING_METADATA_GROUP_SIZE && scan_state.position_in_group == 0) {
183898
+
183899
+ memcpy(result_data + result_offset, scan_state.current_metadata_group_ptr, scan_count * sizeof(T));
183900
+ scan_state.current_metadata_group_ptr += scan_count * sizeof(T);
183901
+ scan_state.LoadCurrentMetaData();
183781
183902
  return;
183782
183903
  }
183783
183904
  }
183784
183905
 
183785
- // Determine if we can skip sign extension during compression
183786
- auto &nstats = (NumericStatistics &)*segment.stats.statistics;
183787
- bool skip_sign_extend = std::is_signed<T>::value && nstats.min >= 0;
183906
+ //! Because FOR offsets all our values to be 0 or above, we can always skip sign extension here
183907
+ bool skip_sign_extend = true;
183788
183908
 
183789
183909
  idx_t scanned = 0;
183790
183910
 
183791
183911
  while (scanned < scan_count) {
183792
- // Exhausted this width group, move pointers to next group and load bitwidth for next group.
183793
- if (scan_state.position_in_group >= BITPACKING_WIDTH_GROUP_SIZE) {
183912
+ // Exhausted this metadata group, move pointers to next group and load metadata for next group.
183913
+ if (scan_state.position_in_group >= BITPACKING_METADATA_GROUP_SIZE) {
183794
183914
  scan_state.position_in_group = 0;
183795
- scan_state.bitpacking_width_ptr -= sizeof(bitpacking_width_t);
183796
- scan_state.current_width_group_ptr += (scan_state.current_width * BITPACKING_WIDTH_GROUP_SIZE) / 8;
183797
- scan_state.LoadCurrentBitWidth();
183915
+ scan_state.current_metadata_group_ptr += (scan_state.current_width * BITPACKING_METADATA_GROUP_SIZE) / 8;
183916
+ scan_state.LoadCurrentMetaData();
183798
183917
  }
183799
183918
 
183800
183919
  idx_t offset_in_compression_group =
@@ -183805,7 +183924,7 @@ void BitpackingScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t
183805
183924
 
183806
183925
  // Calculate start of compression algorithm group
183807
183926
  data_ptr_t current_position_ptr =
183808
- scan_state.current_width_group_ptr + scan_state.position_in_group * scan_state.current_width / 8;
183927
+ scan_state.current_metadata_group_ptr + scan_state.position_in_group * scan_state.current_width / 8;
183809
183928
  data_ptr_t decompression_group_start_pointer =
183810
183929
  current_position_ptr - offset_in_compression_group * scan_state.current_width / 8;
183811
183930
 
@@ -183824,7 +183943,7 @@ void BitpackingScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t
183824
183943
  memcpy(current_result_ptr, scan_state.decompression_buffer + offset_in_compression_group,
183825
183944
  to_scan * sizeof(T));
183826
183945
  }
183827
-
183946
+ ApplyFrameOfReference((T *)current_result_ptr, scan_state.current_frame_of_reference, to_scan);
183828
183947
  scanned += to_scan;
183829
183948
  scan_state.position_in_group += to_scan;
183830
183949
  }
@@ -183851,16 +183970,18 @@ void BitpackingFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t r
183851
183970
  scan_state.position_in_group % BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE;
183852
183971
 
183853
183972
  data_ptr_t decompression_group_start_pointer =
183854
- scan_state.current_width_group_ptr +
183973
+ scan_state.current_metadata_group_ptr +
183855
183974
  (scan_state.position_in_group - offset_in_compression_group) * scan_state.current_width / 8;
183856
183975
 
183857
- auto &nstats = (NumericStatistics &)*segment.stats.statistics;
183858
- bool skip_sign_extend = std::is_signed<T>::value && nstats.min >= 0;
183976
+ //! Because FOR offsets all our values to be 0 or above, we can always skip sign extension here
183977
+ bool skip_sign_extend = true;
183859
183978
 
183860
183979
  scan_state.decompress_function((data_ptr_t)scan_state.decompression_buffer, decompression_group_start_pointer,
183861
183980
  scan_state.current_width, skip_sign_extend);
183862
183981
 
183863
183982
  *current_result_ptr = *(T *)(scan_state.decompression_buffer + offset_in_compression_group);
183983
+ //! Apply FOR to result
183984
+ *current_result_ptr += scan_state.current_frame_of_reference;
183864
183985
  }
183865
183986
  template <class T>
183866
183987
  void BitpackingSkip(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count) {
@@ -190334,7 +190455,7 @@ string ValidityStatistics::ToString() const {
190334
190455
 
190335
190456
  namespace duckdb {
190336
190457
 
190337
- const uint64_t VERSION_NUMBER = 37;
190458
+ const uint64_t VERSION_NUMBER = 38;
190338
190459
 
190339
190460
  } // namespace duckdb
190340
190461
 
@@ -191789,7 +191910,8 @@ void ColumnDataCheckpointer::ScanSegments(const std::function<void(Vector &, idx
191789
191910
  }
191790
191911
  }
191791
191912
 
191792
- void ForceCompression(vector<CompressionFunction *> &compression_functions, CompressionType compression_type) {
191913
+ CompressionType ForceCompression(vector<CompressionFunction *> &compression_functions,
191914
+ CompressionType compression_type) {
191793
191915
  // On of the force_compression flags has been set
191794
191916
  // check if this compression method is available
191795
191917
  bool found = false;
@@ -191802,25 +191924,31 @@ void ForceCompression(vector<CompressionFunction *> &compression_functions, Comp
191802
191924
  if (found) {
191803
191925
  // the force_compression method is available
191804
191926
  // clear all other compression methods
191927
+ // except the uncompressed method, so we can fall back on that
191805
191928
  for (idx_t i = 0; i < compression_functions.size(); i++) {
191929
+ if (compression_functions[i]->type == CompressionType::COMPRESSION_UNCOMPRESSED) {
191930
+ continue;
191931
+ }
191806
191932
  if (compression_functions[i]->type != compression_type) {
191807
191933
  compression_functions[i] = nullptr;
191808
191934
  }
191809
191935
  }
191810
191936
  }
191937
+ return found ? compression_type : CompressionType::COMPRESSION_AUTO;
191811
191938
  }
191812
191939
 
191813
191940
  unique_ptr<AnalyzeState> ColumnDataCheckpointer::DetectBestCompressionMethod(idx_t &compression_idx) {
191814
191941
  D_ASSERT(!compression_functions.empty());
191815
191942
  auto &config = DBConfig::GetConfig(GetDatabase());
191943
+ CompressionType forced_method = CompressionType::COMPRESSION_AUTO;
191816
191944
 
191817
191945
  auto compression_type = checkpoint_info.compression_type;
191818
191946
  if (compression_type != CompressionType::COMPRESSION_AUTO) {
191819
- ForceCompression(compression_functions, compression_type);
191947
+ forced_method = ForceCompression(compression_functions, compression_type);
191820
191948
  }
191821
191949
  if (compression_type == CompressionType::COMPRESSION_AUTO &&
191822
191950
  config.options.force_compression != CompressionType::COMPRESSION_AUTO) {
191823
- ForceCompression(compression_functions, config.options.force_compression);
191951
+ forced_method = ForceCompression(compression_functions, config.options.force_compression);
191824
191952
  }
191825
191953
  // set up the analyze states for each compression method
191826
191954
  vector<unique_ptr<AnalyzeState>> analyze_states;
@@ -191858,12 +191986,18 @@ unique_ptr<AnalyzeState> ColumnDataCheckpointer::DetectBestCompressionMethod(idx
191858
191986
  if (!compression_functions[i]) {
191859
191987
  continue;
191860
191988
  }
191989
+ //! Check if the method type is the forced method (if forced is used)
191990
+ bool forced_method_found = compression_functions[i]->type == forced_method;
191861
191991
  auto score = compression_functions[i]->final_analyze(*analyze_states[i]);
191862
- if (score < best_score) {
191992
+ if (score < best_score || forced_method_found) {
191863
191993
  compression_idx = i;
191864
191994
  best_score = score;
191865
191995
  state = move(analyze_states[i]);
191866
191996
  }
191997
+ //! If we have found the forced method, we're done
191998
+ if (forced_method_found) {
191999
+ break;
192000
+ }
191867
192001
  }
191868
192002
  return state;
191869
192003
  }
@@ -191892,7 +192026,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
191892
192026
  auto analyze_state = DetectBestCompressionMethod(compression_idx);
191893
192027
 
191894
192028
  if (!analyze_state) {
191895
- throw InternalException("No suitable compression/storage method found to store column");
192029
+ throw FatalException("No suitable compression/storage method found to store column");
191896
192030
  }
191897
192031
 
191898
192032
  // now that we have analyzed the compression functions we can start writing to disk