duckdb 0.5.2-dev2164.0 → 0.5.2-dev2189.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -1767,15 +1767,21 @@ private:
1767
1767
 
1768
1768
  namespace duckdb {
1769
1769
  class Prefix {
1770
+ static constexpr idx_t PREFIX_INLINE_BYTES = 8;
1771
+
1770
1772
  public:
1771
1773
  Prefix();
1772
1774
  // Prefix created from key starting on `depth`
1773
1775
  Prefix(Key &key, uint32_t depth, uint32_t size);
1774
1776
  // Prefix created from other prefix up to size
1775
1777
  Prefix(Prefix &other_prefix, uint32_t size);
1778
+ ~Prefix();
1776
1779
 
1777
1780
  // Returns the Prefix's size
1778
1781
  uint32_t Size() const;
1782
+ //! Return a pointer to the prefix data
1783
+ uint8_t *GetPrefixData();
1784
+ const uint8_t *GetPrefixData() const;
1779
1785
 
1780
1786
  // Subscript operator
1781
1787
  uint8_t &operator[](idx_t idx);
@@ -1803,8 +1809,17 @@ public:
1803
1809
  uint32_t MismatchPosition(Prefix &other);
1804
1810
 
1805
1811
  private:
1806
- unique_ptr<uint8_t[]> prefix;
1807
1812
  uint32_t size;
1813
+ union {
1814
+ uint8_t *ptr;
1815
+ uint8_t inlined[8];
1816
+ } value;
1817
+
1818
+ private:
1819
+ bool IsInlined() const;
1820
+ uint8_t *AllocatePrefix(uint32_t size);
1821
+ void Overwrite(uint32_t new_size, uint8_t *data);
1822
+ void Destroy();
1808
1823
  };
1809
1824
 
1810
1825
  } // namespace duckdb
@@ -1813,6 +1828,7 @@ private:
1813
1828
 
1814
1829
 
1815
1830
 
1831
+
1816
1832
  namespace duckdb {
1817
1833
  enum class NodeType : uint8_t { NLeaf = 0, N4 = 1, N16 = 2, N48 = 3, N256 = 4 };
1818
1834
  class ART;
@@ -1867,6 +1883,7 @@ public:
1867
1883
  //! Compressed path (prefix)
1868
1884
  Prefix prefix;
1869
1885
 
1886
+ static void Delete(Node *node);
1870
1887
  //! Get the position of a child corresponding exactly to the specific byte, returns DConstants::INVALID_INDEX if not
1871
1888
  //! exists
1872
1889
  virtual idx_t GetChildPos(uint8_t k) {
@@ -1928,16 +1945,21 @@ namespace duckdb {
1928
1945
  class Leaf : public Node {
1929
1946
  public:
1930
1947
  Leaf(Key &value, uint32_t depth, row_t row_id);
1931
- Leaf(Key &value, uint32_t depth, unique_ptr<row_t[]> row_ids, idx_t num_elements);
1932
- Leaf(unique_ptr<row_t[]> row_ids, idx_t num_elements, Prefix &prefix);
1948
+ Leaf(Key &value, uint32_t depth, row_t *row_ids, idx_t num_elements);
1949
+ Leaf(row_t *row_ids, idx_t num_elements, Prefix &prefix);
1950
+ Leaf(row_t row_id, Prefix &prefix);
1951
+ ~Leaf();
1933
1952
 
1934
- idx_t capacity;
1935
-
1936
- row_t GetRowId(idx_t index) {
1937
- return row_ids[index];
1938
- }
1953
+ row_t GetRowId(idx_t index);
1954
+ idx_t GetCapacity() const;
1955
+ bool IsInlined() const;
1956
+ row_t *GetRowIds();
1939
1957
 
1940
1958
  public:
1959
+ static Leaf *New(Key &value, uint32_t depth, row_t row_id);
1960
+ static Leaf *New(Key &value, uint32_t depth, row_t *row_ids, idx_t num_elements);
1961
+ static Leaf *New(row_t *row_ids, idx_t num_elements, Prefix &prefix);
1962
+ static Leaf *New(row_t row_id, Prefix &prefix);
1941
1963
  //! Insert a row_id into a leaf
1942
1964
  void Insert(row_t row_id);
1943
1965
  //! Remove a row_id from a leaf
@@ -1954,7 +1976,13 @@ public:
1954
1976
  static Leaf *Deserialize(duckdb::MetaBlockReader &reader);
1955
1977
 
1956
1978
  private:
1957
- unique_ptr<row_t[]> row_ids;
1979
+ union {
1980
+ row_t inlined;
1981
+ row_t *ptr;
1982
+ } rowids;
1983
+
1984
+ private:
1985
+ row_t *Resize(row_t *current_row_ids, uint32_t current_count, idx_t new_capacity);
1958
1986
  };
1959
1987
 
1960
1988
  } // namespace duckdb
@@ -2043,6 +2071,7 @@ private:
2043
2071
 
2044
2072
 
2045
2073
 
2074
+
2046
2075
  namespace duckdb {
2047
2076
 
2048
2077
  class ART;
@@ -2094,6 +2123,7 @@ public:
2094
2123
  ARTPointer children[16];
2095
2124
 
2096
2125
  public:
2126
+ static Node16 *New();
2097
2127
  //! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
2098
2128
  idx_t GetChildPos(uint8_t k) override;
2099
2129
  //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
@@ -2139,6 +2169,7 @@ public:
2139
2169
  ARTPointer children[256];
2140
2170
 
2141
2171
  public:
2172
+ static Node256 *New();
2142
2173
  //! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
2143
2174
  idx_t GetChildPos(uint8_t k) override;
2144
2175
  //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
@@ -2181,11 +2212,13 @@ namespace duckdb {
2181
2212
  class Node4 : public Node {
2182
2213
  public:
2183
2214
  Node4();
2215
+
2184
2216
  uint8_t key[4];
2185
2217
  // Pointers to the child nodes
2186
2218
  ARTPointer children[4];
2187
2219
 
2188
2220
  public:
2221
+ static Node4 *New();
2189
2222
  //! Get position of a byte, returns DConstants::INVALID_INDEX if not exists
2190
2223
  idx_t GetChildPos(uint8_t k) override;
2191
2224
  //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
@@ -2232,6 +2265,7 @@ public:
2232
2265
  ARTPointer children[48];
2233
2266
 
2234
2267
  public:
2268
+ static Node48 *New();
2235
2269
  //! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
2236
2270
  idx_t GetChildPos(uint8_t k) override;
2237
2271
  //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
@@ -2362,6 +2396,12 @@ private:
2362
2396
  bool right_inclusive, idx_t max_count, vector<row_t> &result_ids);
2363
2397
 
2364
2398
  void VerifyExistence(DataChunk &chunk, VerifyExistenceType verify_type, string *err_msg_ptr = nullptr);
2399
+
2400
+ private:
2401
+ //! The estimated ART memory consumption
2402
+ idx_t estimated_art_size;
2403
+ //! The estimated memory consumption of a single key
2404
+ idx_t estimated_key_size;
2365
2405
  };
2366
2406
 
2367
2407
  } // namespace duckdb
@@ -6543,6 +6583,7 @@ Allocator::~Allocator() {
6543
6583
  }
6544
6584
 
6545
6585
  data_ptr_t Allocator::AllocateData(idx_t size) {
6586
+ D_ASSERT(size > 0);
6546
6587
  auto result = allocate_function(private_data.get(), size);
6547
6588
  #ifdef DEBUG
6548
6589
  D_ASSERT(private_data);
@@ -6555,6 +6596,7 @@ void Allocator::FreeData(data_ptr_t pointer, idx_t size) {
6555
6596
  if (!pointer) {
6556
6597
  return;
6557
6598
  }
6599
+ D_ASSERT(size > 0);
6558
6600
  #ifdef DEBUG
6559
6601
  D_ASSERT(private_data);
6560
6602
  private_data->debug_info->FreeData(pointer, size);
@@ -6574,11 +6616,15 @@ data_ptr_t Allocator::ReallocateData(data_ptr_t pointer, idx_t old_size, idx_t s
6574
6616
  return new_pointer;
6575
6617
  }
6576
6618
 
6577
- Allocator &Allocator::DefaultAllocator() {
6578
- static Allocator DEFAULT_ALLOCATOR;
6619
+ shared_ptr<Allocator> &Allocator::DefaultAllocatorReference() {
6620
+ static shared_ptr<Allocator> DEFAULT_ALLOCATOR = make_shared<Allocator>();
6579
6621
  return DEFAULT_ALLOCATOR;
6580
6622
  }
6581
6623
 
6624
+ Allocator &Allocator::DefaultAllocator() {
6625
+ return *DefaultAllocatorReference();
6626
+ }
6627
+
6582
6628
  //===--------------------------------------------------------------------===//
6583
6629
  // Debug Info (extended)
6584
6630
  //===--------------------------------------------------------------------===//
@@ -60543,7 +60589,8 @@ namespace duckdb {
60543
60589
  ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
60544
60590
  const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type,
60545
60591
  DatabaseInstance &db, idx_t block_id, idx_t block_offset)
60546
- : Index(IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type), db(db) {
60592
+ : Index(IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type), db(db),
60593
+ estimated_art_size(0), estimated_key_size(16) {
60547
60594
  if (block_id != DConstants::INVALID_INDEX) {
60548
60595
  tree = Node::Deserialize(*this, block_id, block_offset);
60549
60596
  } else {
@@ -60554,17 +60601,28 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
60554
60601
  switch (types[i]) {
60555
60602
  case PhysicalType::BOOL:
60556
60603
  case PhysicalType::INT8:
60557
- case PhysicalType::INT16:
60558
- case PhysicalType::INT32:
60559
- case PhysicalType::INT64:
60560
- case PhysicalType::INT128:
60561
60604
  case PhysicalType::UINT8:
60605
+ estimated_key_size += sizeof(int8_t);
60606
+ break;
60607
+ case PhysicalType::INT16:
60562
60608
  case PhysicalType::UINT16:
60609
+ estimated_key_size += sizeof(int16_t);
60610
+ break;
60611
+ case PhysicalType::INT32:
60563
60612
  case PhysicalType::UINT32:
60564
- case PhysicalType::UINT64:
60565
60613
  case PhysicalType::FLOAT:
60614
+ estimated_key_size += sizeof(int32_t);
60615
+ break;
60616
+ case PhysicalType::INT64:
60617
+ case PhysicalType::UINT64:
60566
60618
  case PhysicalType::DOUBLE:
60619
+ estimated_key_size += sizeof(int64_t);
60620
+ break;
60621
+ case PhysicalType::INT128:
60622
+ estimated_key_size += sizeof(hugeint_t);
60623
+ break;
60567
60624
  case PhysicalType::VARCHAR:
60625
+ estimated_key_size += 16; // oh well
60568
60626
  break;
60569
60627
  default:
60570
60628
  throw InvalidTypeException(logical_types[i], "Invalid type for index");
@@ -60573,8 +60631,12 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
60573
60631
  }
60574
60632
 
60575
60633
  ART::~ART() {
60634
+ if (estimated_art_size > 0) {
60635
+ BufferManager::GetBufferManager(db).FreeReservedMemory(estimated_art_size);
60636
+ estimated_art_size = 0;
60637
+ }
60576
60638
  if (tree) {
60577
- delete tree;
60639
+ Node::Delete(tree);
60578
60640
  tree = nullptr;
60579
60641
  }
60580
60642
  }
@@ -60777,7 +60839,6 @@ void Construct(vector<Key> &keys, row_t *row_ids, Node *&node, KeySection &key_s
60777
60839
 
60778
60840
  // we reached a leaf, i.e. all the bytes of start_key and end_key match
60779
60841
  if (start_key.len == key_section.depth) {
60780
-
60781
60842
  // end_idx is inclusive
60782
60843
  auto num_row_ids = key_section.end - key_section.start + 1;
60783
60844
 
@@ -60786,14 +60847,7 @@ void Construct(vector<Key> &keys, row_t *row_ids, Node *&node, KeySection &key_s
60786
60847
  throw ConstraintException("New data contains duplicates on indexed column(s)");
60787
60848
  }
60788
60849
 
60789
- // new row ids of this leaf
60790
- auto new_row_ids = unique_ptr<row_t[]>(new row_t[num_row_ids]);
60791
- for (idx_t i = 0; i < num_row_ids; i++) {
60792
- new_row_ids[i] = row_ids[key_section.start + i];
60793
- }
60794
-
60795
- node = new Leaf(start_key, prefix_start, move(new_row_ids), num_row_ids);
60796
-
60850
+ node = Leaf::New(start_key, prefix_start, row_ids + key_section.start, num_row_ids);
60797
60851
  } else { // create a new node and recurse
60798
60852
 
60799
60853
  // we will find at least two child entries of this node, otherwise we'd have reached a leaf
@@ -60881,6 +60935,10 @@ bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
60881
60935
  vector<Key> keys(input.size());
60882
60936
  GenerateKeys(arena_allocator, input, keys);
60883
60937
 
60938
+ idx_t extra_memory = estimated_key_size * input.size();
60939
+ BufferManager::GetBufferManager(db).ReserveMemory(extra_memory);
60940
+ estimated_art_size += extra_memory;
60941
+
60884
60942
  // now insert the elements into the index
60885
60943
  row_ids.Flatten(input.size());
60886
60944
  auto row_identifiers = FlatVector::GetData<row_t>(row_ids);
@@ -60952,7 +61010,7 @@ bool ART::Insert(Node *&node, Key &key, idx_t depth, row_t row_id) {
60952
61010
 
60953
61011
  if (!node) {
60954
61012
  // node is currently empty, create a leaf here with the key
60955
- node = new Leaf(key, depth, row_id);
61013
+ node = Leaf::New(key, depth, row_id);
60956
61014
  return true;
60957
61015
  }
60958
61016
 
@@ -60975,11 +61033,11 @@ bool ART::Insert(Node *&node, Key &key, idx_t depth, row_t row_id) {
60975
61033
  }
60976
61034
  }
60977
61035
 
60978
- Node *new_node = new Node4();
61036
+ Node *new_node = Node4::New();
60979
61037
  new_node->prefix = Prefix(key, depth, new_prefix_length);
60980
61038
  auto key_byte = node->prefix.Reduce(new_prefix_length);
60981
61039
  Node4::InsertChild(new_node, key_byte, node);
60982
- Node *leaf_node = new Leaf(key, depth + new_prefix_length + 1, row_id);
61040
+ Node *leaf_node = Leaf::New(key, depth + new_prefix_length + 1, row_id);
60983
61041
  Node4::InsertChild(new_node, key[depth + new_prefix_length], leaf_node);
60984
61042
  node = new_node;
60985
61043
  return true;
@@ -60990,13 +61048,13 @@ bool ART::Insert(Node *&node, Key &key, idx_t depth, row_t row_id) {
60990
61048
  uint32_t mismatch_pos = node->prefix.KeyMismatchPosition(key, depth);
60991
61049
  if (mismatch_pos != node->prefix.Size()) {
60992
61050
  // Prefix differs, create new node
60993
- Node *new_node = new Node4();
61051
+ Node *new_node = Node4::New();
60994
61052
  new_node->prefix = Prefix(key, depth, mismatch_pos);
60995
61053
  // Break up prefix
60996
61054
  auto key_byte = node->prefix.Reduce(mismatch_pos);
60997
61055
  Node4::InsertChild(new_node, key_byte, node);
60998
61056
 
60999
- Node *leaf_node = new Leaf(key, depth + mismatch_pos + 1, row_id);
61057
+ Node *leaf_node = Leaf::New(key, depth + mismatch_pos + 1, row_id);
61000
61058
  Node4::InsertChild(new_node, key[depth + mismatch_pos], leaf_node);
61001
61059
  node = new_node;
61002
61060
  return true;
@@ -61013,7 +61071,7 @@ bool ART::Insert(Node *&node, Key &key, idx_t depth, row_t row_id) {
61013
61071
  node->ReplaceChildPointer(pos, child);
61014
61072
  return insertion_result;
61015
61073
  }
61016
- Node *new_node = new Leaf(key, depth + 1, row_id);
61074
+ Node *new_node = Leaf::New(key, depth + 1, row_id);
61017
61075
  Node::InsertChild(node, key[depth], new_node);
61018
61076
  return true;
61019
61077
  }
@@ -61028,6 +61086,10 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
61028
61086
  // first resolve the expressions
61029
61087
  ExecuteExpressions(input, expression);
61030
61088
 
61089
+ idx_t released_memory = MinValue<idx_t>(estimated_art_size, estimated_key_size * input.size());
61090
+ BufferManager::GetBufferManager(db).FreeReservedMemory(released_memory);
61091
+ estimated_art_size -= released_memory;
61092
+
61031
61093
  // then generate the keys for the given input
61032
61094
  ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
61033
61095
  vector<Key> keys(expression.size());
@@ -61064,7 +61126,7 @@ void ART::Erase(Node *&node, Key &key, idx_t depth, row_t row_id) {
61064
61126
  auto leaf = static_cast<Leaf *>(node);
61065
61127
  leaf->Remove(row_id);
61066
61128
  if (leaf->count == 0) {
61067
- delete node;
61129
+ Node::Delete(node);
61068
61130
  node = nullptr;
61069
61131
  }
61070
61132
 
@@ -61416,9 +61478,9 @@ BlockPointer ART::Serialize(duckdb::MetaBlockWriter &writer) {
61416
61478
  // Merge ARTs
61417
61479
  //===--------------------------------------------------------------------===//
61418
61480
  bool ART::MergeIndexes(IndexLock &state, Index *other_index) {
61419
-
61420
61481
  auto other_art = (ART *)other_index;
61421
-
61482
+ estimated_art_size += other_art->estimated_art_size;
61483
+ other_art->estimated_art_size = 0;
61422
61484
  if (!this->tree) {
61423
61485
  this->tree = other_art->tree;
61424
61486
  other_art->tree = nullptr;
@@ -61831,45 +61893,104 @@ bool Iterator::LowerBound(Node *node, Key &key, bool inclusive) {
61831
61893
  #include <cstring>
61832
61894
 
61833
61895
  namespace duckdb {
61896
+ idx_t Leaf::GetCapacity() const {
61897
+ return IsInlined() ? 1 : rowids.ptr[0];
61898
+ }
61899
+
61900
+ bool Leaf::IsInlined() const {
61901
+ return count <= 1;
61902
+ }
61903
+
61904
+ row_t Leaf::GetRowId(idx_t index) {
61905
+ D_ASSERT(index < count);
61906
+ if (IsInlined()) {
61907
+ return rowids.inlined;
61908
+ } else {
61909
+ D_ASSERT(rowids.ptr[0] >= count);
61910
+ return rowids.ptr[index + 1];
61911
+ }
61912
+ }
61913
+
61914
+ row_t *Leaf::GetRowIds() {
61915
+ if (IsInlined()) {
61916
+ return &rowids.inlined;
61917
+ } else {
61918
+ return rowids.ptr + 1;
61919
+ }
61920
+ }
61834
61921
 
61835
61922
  Leaf::Leaf(Key &value, uint32_t depth, row_t row_id) : Node(NodeType::NLeaf) {
61836
- capacity = 1;
61837
- row_ids = unique_ptr<row_t[]>(new row_t[capacity]);
61838
- row_ids[0] = row_id;
61839
61923
  count = 1;
61924
+ rowids.inlined = row_id;
61840
61925
  D_ASSERT(value.len >= depth);
61841
61926
  prefix = Prefix(value, depth, value.len - depth);
61842
61927
  }
61843
61928
 
61844
- Leaf::Leaf(Key &value, uint32_t depth, unique_ptr<row_t[]> row_ids_p, idx_t num_elements_p) : Node(NodeType::NLeaf) {
61845
- capacity = num_elements_p;
61846
- row_ids = move(row_ids_p);
61929
+ Leaf::Leaf(Key &value, uint32_t depth, row_t *row_ids_p, idx_t num_elements_p) : Node(NodeType::NLeaf) {
61930
+ D_ASSERT(num_elements_p >= 1);
61931
+ if (num_elements_p == 1) {
61932
+ // we can inline the row ids
61933
+ rowids.inlined = row_ids_p[0];
61934
+ } else {
61935
+ // new row ids of this leaf
61936
+ count = 0;
61937
+ Resize(row_ids_p, num_elements_p, num_elements_p);
61938
+ }
61847
61939
  count = num_elements_p;
61848
61940
  D_ASSERT(value.len >= depth);
61849
61941
  prefix = Prefix(value, depth, value.len - depth);
61850
61942
  }
61851
61943
 
61852
- Leaf::Leaf(unique_ptr<row_t[]> row_ids_p, idx_t num_elements_p, Prefix &prefix_p) : Node(NodeType::NLeaf) {
61853
- capacity = num_elements_p;
61854
- row_ids = move(row_ids_p);
61944
+ Leaf::Leaf(row_t *row_ids_p, idx_t num_elements_p, Prefix &prefix_p) : Node(NodeType::NLeaf) {
61945
+ D_ASSERT(num_elements_p > 1);
61946
+ D_ASSERT(row_ids_p[0] == row_t(num_elements_p)); // first element should contain capacity
61947
+ rowids.ptr = row_ids_p;
61855
61948
  count = num_elements_p;
61856
61949
  prefix = prefix_p;
61857
61950
  }
61858
61951
 
61859
- void Leaf::Insert(row_t row_id) {
61952
+ Leaf::Leaf(row_t row_id, Prefix &prefix_p) : Node(NodeType::NLeaf) {
61953
+ rowids.inlined = row_id;
61954
+ count = 1;
61955
+ prefix = prefix_p;
61956
+ }
61860
61957
 
61861
- // Grow array
61958
+ Leaf::~Leaf() {
61959
+ if (!IsInlined()) {
61960
+ DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
61961
+ count = 0;
61962
+ }
61963
+ }
61964
+
61965
+ row_t *Leaf::Resize(row_t *current_row_ids, uint32_t current_count, idx_t new_capacity) {
61966
+ D_ASSERT(new_capacity >= current_count);
61967
+ auto new_allocation = AllocateArray<row_t>(new_capacity + 1);
61968
+ new_allocation[0] = new_capacity;
61969
+ auto new_row_ids = new_allocation + 1;
61970
+ memcpy(new_row_ids, current_row_ids, current_count * sizeof(row_t));
61971
+ if (!IsInlined()) {
61972
+ // delete the old data
61973
+ DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
61974
+ }
61975
+ // set up the new pointers
61976
+ rowids.ptr = new_allocation;
61977
+ return new_row_ids;
61978
+ }
61979
+
61980
+ void Leaf::Insert(row_t row_id) {
61981
+ auto capacity = GetCapacity();
61982
+ row_t *row_ids = GetRowIds();
61983
+ D_ASSERT(count <= capacity);
61862
61984
  if (count == capacity) {
61863
- auto new_row_id = unique_ptr<row_t[]>(new row_t[capacity * 2]);
61864
- memcpy(new_row_id.get(), row_ids.get(), capacity * sizeof(row_t));
61865
- capacity *= 2;
61866
- row_ids = move(new_row_id);
61985
+ // Grow array
61986
+ row_ids = Resize(row_ids, count, capacity * 2);
61867
61987
  }
61868
61988
  row_ids[count++] = row_id;
61869
61989
  }
61870
61990
 
61871
61991
  void Leaf::Remove(row_t row_id) {
61872
61992
  idx_t entry_offset = DConstants::INVALID_INDEX;
61993
+ row_t *row_ids = GetRowIds();
61873
61994
  for (idx_t i = 0; i < count; i++) {
61874
61995
  if (row_ids[i] == row_id) {
61875
61996
  entry_offset = i;
@@ -61879,33 +62000,48 @@ void Leaf::Remove(row_t row_id) {
61879
62000
  if (entry_offset == DConstants::INVALID_INDEX) {
61880
62001
  return;
61881
62002
  }
62003
+ if (IsInlined()) {
62004
+ D_ASSERT(count == 1);
62005
+ count--;
62006
+ return;
62007
+ }
61882
62008
  count--;
62009
+ if (count == 1) {
62010
+ // after erasing we can now inline the leaf
62011
+ // delete the pointer and inline the remaining rowid
62012
+ auto remaining_row_id = row_ids[0] == row_id ? row_ids[1] : row_ids[0];
62013
+ DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
62014
+ rowids.inlined = remaining_row_id;
62015
+ return;
62016
+ }
62017
+ auto capacity = GetCapacity();
61883
62018
  if (capacity > 2 && count < capacity / 2) {
61884
62019
  // Shrink array, if less than half full
61885
- auto new_row_id = unique_ptr<row_t[]>(new row_t[capacity / 2]);
61886
- memcpy(new_row_id.get(), row_ids.get(), entry_offset * sizeof(row_t));
61887
- memcpy(new_row_id.get() + entry_offset, row_ids.get() + entry_offset + 1,
61888
- (count - entry_offset) * sizeof(row_t));
61889
- capacity /= 2;
61890
- row_ids = move(new_row_id);
62020
+ auto new_capacity = capacity / 2;
62021
+ auto new_allocation = AllocateArray<row_t>(new_capacity + 1);
62022
+ new_allocation[0] = new_capacity;
62023
+ auto new_row_ids = new_allocation + 1;
62024
+ memcpy(new_row_ids, row_ids, entry_offset * sizeof(row_t));
62025
+ memcpy(new_row_ids + entry_offset, row_ids + entry_offset + 1, (count - entry_offset) * sizeof(row_t));
62026
+ DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
62027
+ rowids.ptr = new_allocation;
61891
62028
  } else {
61892
62029
  // Copy the rest
61893
- memmove(row_ids.get() + entry_offset, row_ids.get() + entry_offset + 1, (count - entry_offset) * sizeof(row_t));
62030
+ memmove(row_ids + entry_offset, row_ids + entry_offset + 1, (count - entry_offset) * sizeof(row_t));
61894
62031
  }
61895
62032
  }
61896
62033
 
61897
62034
  string Leaf::ToString(Node *node) {
61898
-
61899
62035
  Leaf *leaf = (Leaf *)node;
61900
62036
  string str = "Leaf: [";
62037
+ auto row_ids = leaf->GetRowIds();
61901
62038
  for (idx_t i = 0; i < leaf->count; i++) {
61902
- str += i == 0 ? to_string(leaf->row_ids[i]) : ", " + to_string(leaf->row_ids[i]);
62039
+ str += i == 0 ? to_string(row_ids[i]) : ", " + to_string(row_ids[i]);
61903
62040
  }
61904
62041
  return str + "]";
61905
62042
  }
61906
62043
 
61907
62044
  void Leaf::Merge(Node *&l_node, Node *&r_node) {
61908
-
61909
62045
  Leaf *l_n = (Leaf *)l_node;
61910
62046
  Leaf *r_n = (Leaf *)r_node;
61911
62047
 
@@ -61923,8 +62059,9 @@ BlockPointer Leaf::Serialize(duckdb::MetaBlockWriter &writer) {
61923
62059
  prefix.Serialize(writer);
61924
62060
  // Write Row Ids
61925
62061
  // Length
61926
- writer.Write(count);
62062
+ writer.Write<uint16_t>(count);
61927
62063
  // Actual Row Ids
62064
+ auto row_ids = GetRowIds();
61928
62065
  for (idx_t i = 0; i < count; i++) {
61929
62066
  writer.Write(row_ids[i]);
61930
62067
  }
@@ -61935,11 +62072,19 @@ Leaf *Leaf::Deserialize(MetaBlockReader &reader) {
61935
62072
  Prefix prefix;
61936
62073
  prefix.Deserialize(reader);
61937
62074
  auto num_elements = reader.Read<uint16_t>();
61938
- auto elements = unique_ptr<row_t[]>(new row_t[num_elements]);
61939
- for (idx_t i = 0; i < num_elements; i++) {
61940
- elements[i] = reader.Read<row_t>();
62075
+ if (num_elements == 1) {
62076
+ // inlined
62077
+ auto element = reader.Read<row_t>();
62078
+ return Leaf::New(element, prefix);
62079
+ } else {
62080
+ // non-inlined
62081
+ auto elements = AllocateArray<row_t>(num_elements + 1);
62082
+ elements[0] = num_elements;
62083
+ for (idx_t i = 0; i < num_elements; i++) {
62084
+ elements[i + 1] = reader.Read<row_t>();
62085
+ }
62086
+ return Leaf::New(elements, num_elements, prefix);
61941
62087
  }
61942
- return new Leaf(move(elements), num_elements, prefix);
61943
62088
  }
61944
62089
 
61945
62090
  } // namespace duckdb
@@ -62074,25 +62219,78 @@ NodeType Node::GetTypeBySize(idx_t size) {
62074
62219
  }
62075
62220
 
62076
62221
  void Node::New(NodeType &type, Node *&node) {
62077
-
62078
62222
  switch (type) {
62079
62223
  case NodeType::N4:
62080
- node = new Node4();
62224
+ node = (Node *)Node4::New();
62081
62225
  return;
62082
62226
  case NodeType::N16:
62083
- node = new Node16();
62227
+ node = (Node *)Node16::New();
62084
62228
  return;
62085
62229
  case NodeType::N48:
62086
- node = new Node48();
62230
+ node = (Node *)Node48::New();
62087
62231
  return;
62088
62232
  case NodeType::N256:
62089
- node = new Node256();
62233
+ node = (Node *)Node256::New();
62090
62234
  return;
62091
62235
  default:
62092
62236
  throw InternalException("Unrecognized type for new node creation!");
62093
62237
  }
62094
62238
  }
62095
62239
 
62240
+ Node4 *Node4::New() {
62241
+ return AllocateObject<Node4>();
62242
+ }
62243
+
62244
+ Node16 *Node16::New() {
62245
+ return AllocateObject<Node16>();
62246
+ }
62247
+
62248
+ Node48 *Node48::New() {
62249
+ return AllocateObject<Node48>();
62250
+ }
62251
+
62252
+ Node256 *Node256::New() {
62253
+ return AllocateObject<Node256>();
62254
+ }
62255
+
62256
+ Leaf *Leaf::New(Key &value, uint32_t depth, row_t row_id) {
62257
+ return AllocateObject<Leaf>(value, depth, row_id);
62258
+ }
62259
+
62260
+ Leaf *Leaf::New(Key &value, uint32_t depth, row_t *row_ids, idx_t num_elements) {
62261
+ return AllocateObject<Leaf>(value, depth, row_ids, num_elements);
62262
+ }
62263
+
62264
+ Leaf *Leaf::New(row_t *row_ids, idx_t num_elements, Prefix &prefix) {
62265
+ return AllocateObject<Leaf>(row_ids, num_elements, prefix);
62266
+ }
62267
+
62268
+ Leaf *Leaf::New(row_t row_id, Prefix &prefix) {
62269
+ return AllocateObject<Leaf>(row_id, prefix);
62270
+ }
62271
+
62272
+ void Node::Delete(Node *ptr) {
62273
+ switch (ptr->type) {
62274
+ case NodeType::NLeaf:
62275
+ DestroyObject((Leaf *)ptr);
62276
+ break;
62277
+ case NodeType::N4:
62278
+ DestroyObject((Node4 *)ptr);
62279
+ break;
62280
+ case NodeType::N16:
62281
+ DestroyObject((Node16 *)ptr);
62282
+ break;
62283
+ case NodeType::N48:
62284
+ DestroyObject((Node48 *)ptr);
62285
+ break;
62286
+ case NodeType::N256:
62287
+ DestroyObject((Node256 *)ptr);
62288
+ break;
62289
+ default:
62290
+ throw InternalException("eek");
62291
+ }
62292
+ }
62293
+
62096
62294
  string Node::ToString(ART &art) {
62097
62295
 
62098
62296
  string str = "Node";
@@ -62133,7 +62331,7 @@ BlockPointer Node::SerializeInternal(ART &art, duckdb::MetaBlockWriter &writer,
62133
62331
  // Write Node Type
62134
62332
  writer.Write(type);
62135
62333
  // Write count
62136
- writer.Write(count);
62334
+ writer.Write<uint16_t>(count);
62137
62335
  // Write Prefix
62138
62336
  prefix.Serialize(writer);
62139
62337
  // Write Key values
@@ -62190,19 +62388,19 @@ Node *Node::Deserialize(ART &art, idx_t block_id, idx_t offset) {
62190
62388
  case NodeType::NLeaf:
62191
62389
  return Leaf::Deserialize(reader);
62192
62390
  case NodeType::N4: {
62193
- deserialized_node = (Node *)new Node4();
62391
+ deserialized_node = (Node *)Node4::New();
62194
62392
  break;
62195
62393
  }
62196
62394
  case NodeType::N16: {
62197
- deserialized_node = (Node *)new Node16();
62395
+ deserialized_node = (Node *)Node16::New();
62198
62396
  break;
62199
62397
  }
62200
62398
  case NodeType::N48: {
62201
- deserialized_node = (Node *)new Node48();
62399
+ deserialized_node = (Node *)Node48::New();
62202
62400
  break;
62203
62401
  }
62204
62402
  case NodeType::N256: {
62205
- deserialized_node = (Node *)new Node256();
62403
+ deserialized_node = (Node *)Node256::New();
62206
62404
  break;
62207
62405
  }
62208
62406
  }
@@ -62211,7 +62409,6 @@ Node *Node::Deserialize(ART &art, idx_t block_id, idx_t offset) {
62211
62409
  }
62212
62410
 
62213
62411
  void UpdateParentsOfNodes(Node *&l_node, Node *&r_node, ParentsOfNodes &parents) {
62214
-
62215
62412
  if (parents.l_parent) {
62216
62413
  parents.l_parent->ReplaceChildPointer(parents.l_pos, l_node);
62217
62414
  }
@@ -62255,7 +62452,6 @@ bool Merge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
62255
62452
  }
62256
62453
 
62257
62454
  bool ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
62258
-
62259
62455
  auto &l_node = info.l_node;
62260
62456
  auto &r_node = info.r_node;
62261
62457
  Node *null_parent = nullptr;
@@ -62308,7 +62504,7 @@ bool ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &paren
62308
62504
  // prefixes differ, create new node and insert both nodes as children
62309
62505
 
62310
62506
  // create new node
62311
- Node *new_node = new Node4();
62507
+ Node *new_node = Node4::New();
62312
62508
  new_node->prefix = Prefix(l_node->prefix, mismatch_pos);
62313
62509
 
62314
62510
  // insert l_node, break up prefix of l_node
@@ -62435,7 +62631,7 @@ void Node16::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
62435
62631
  n->count++;
62436
62632
  } else {
62437
62633
  // Grow to Node48
62438
- auto new_node = new Node48();
62634
+ auto new_node = Node48::New();
62439
62635
  for (idx_t i = 0; i < node->count; i++) {
62440
62636
  new_node->child_index[n->key[i]] = i;
62441
62637
  new_node->children[i] = n->children[i];
@@ -62443,7 +62639,7 @@ void Node16::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
62443
62639
  }
62444
62640
  new_node->prefix = move(n->prefix);
62445
62641
  new_node->count = node->count;
62446
- delete node;
62642
+ Node::Delete(node);
62447
62643
  node = new_node;
62448
62644
 
62449
62645
  Node48::InsertChild(node, key_byte, new_child);
@@ -62470,14 +62666,14 @@ void Node16::EraseChild(Node *&node, int pos, ART &art) {
62470
62666
 
62471
62667
  if (node->count <= 3) {
62472
62668
  // Shrink node
62473
- auto new_node = new Node4();
62669
+ auto new_node = Node4::New();
62474
62670
  for (unsigned i = 0; i < n->count; i++) {
62475
62671
  new_node->key[new_node->count] = n->key[i];
62476
62672
  new_node->children[new_node->count++] = n->children[i];
62477
62673
  n->children[i] = nullptr;
62478
62674
  }
62479
62675
  new_node->prefix = move(n->prefix);
62480
- delete node;
62676
+ Node::Delete(node);
62481
62677
  node = new_node;
62482
62678
  }
62483
62679
  }
@@ -62570,7 +62766,7 @@ void Node256::EraseChild(Node *&node, int pos, ART &art) {
62570
62766
  n->children[pos].Reset();
62571
62767
  n->count--;
62572
62768
  if (node->count <= 36) {
62573
- auto new_node = new Node48();
62769
+ auto new_node = Node48::New();
62574
62770
  new_node->prefix = move(n->prefix);
62575
62771
  for (idx_t i = 0; i < 256; i++) {
62576
62772
  if (n->children[i]) {
@@ -62580,7 +62776,7 @@ void Node256::EraseChild(Node *&node, int pos, ART &art) {
62580
62776
  new_node->count++;
62581
62777
  }
62582
62778
  }
62583
- delete node;
62779
+ Node::Delete(node);
62584
62780
  node = new_node;
62585
62781
  }
62586
62782
  }
@@ -62682,7 +62878,7 @@ void Node4::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
62682
62878
  n->count++;
62683
62879
  } else {
62684
62880
  // Grow to Node16
62685
- auto new_node = new Node16();
62881
+ auto new_node = Node16::New();
62686
62882
  new_node->count = 4;
62687
62883
  new_node->prefix = move(node->prefix);
62688
62884
  for (idx_t i = 0; i < 4; i++) {
@@ -62691,7 +62887,7 @@ void Node4::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
62691
62887
  n->children[i] = nullptr;
62692
62888
  }
62693
62889
  // Delete old node and replace it with new Node16
62694
- delete node;
62890
+ Node::Delete(node);
62695
62891
  node = new_node;
62696
62892
  Node16::InsertChild(node, key_byte, new_child);
62697
62893
  }
@@ -62719,7 +62915,7 @@ void Node4::EraseChild(Node *&node, int pos, ART &art) {
62719
62915
  // concatenate prefixes
62720
62916
  child_ref->prefix.Concatenate(n->key[0], node->prefix);
62721
62917
  n->children[0] = nullptr;
62722
- delete node;
62918
+ Node::Delete(node);
62723
62919
  node = child_ref;
62724
62920
  }
62725
62921
  }
@@ -62824,7 +63020,7 @@ void Node48::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
62824
63020
  n->count++;
62825
63021
  } else {
62826
63022
  // Grow to Node256
62827
- auto new_node = new Node256();
63023
+ auto new_node = Node256::New();
62828
63024
  for (idx_t i = 0; i < 256; i++) {
62829
63025
  if (n->child_index[i] != Node::EMPTY_MARKER) {
62830
63026
  new_node->children[i] = n->children[n->child_index[i]];
@@ -62833,7 +63029,7 @@ void Node48::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
62833
63029
  }
62834
63030
  new_node->count = n->count;
62835
63031
  new_node->prefix = move(n->prefix);
62836
- delete node;
63032
+ Node::Delete(node);
62837
63033
  node = new_node;
62838
63034
  Node256::InsertChild(node, key_byte, new_child);
62839
63035
  }
@@ -62845,7 +63041,7 @@ void Node48::EraseChild(Node *&node, int pos, ART &art) {
62845
63041
  n->child_index[pos] = Node::EMPTY_MARKER;
62846
63042
  n->count--;
62847
63043
  if (node->count <= 12) {
62848
- auto new_node = new Node16();
63044
+ auto new_node = Node16::New();
62849
63045
  new_node->prefix = move(n->prefix);
62850
63046
  for (idx_t i = 0; i < 256; i++) {
62851
63047
  if (n->child_index[i] != Node::EMPTY_MARKER) {
@@ -62854,7 +63050,7 @@ void Node48::EraseChild(Node *&node, int pos, ART &art) {
62854
63050
  n->children[n->child_index[i]] = nullptr;
62855
63051
  }
62856
63052
  }
62857
- delete node;
63053
+ Node::Delete(node);
62858
63054
  node = new_node;
62859
63055
  }
62860
63056
  }
@@ -62889,12 +63085,38 @@ uint32_t Prefix::Size() const {
62889
63085
  return size;
62890
63086
  }
62891
63087
 
63088
+ bool Prefix::IsInlined() const {
63089
+ return size <= PREFIX_INLINE_BYTES;
63090
+ }
63091
+
63092
+ uint8_t *Prefix::GetPrefixData() {
63093
+ return IsInlined() ? &value.inlined[0] : value.ptr;
63094
+ }
63095
+
63096
+ const uint8_t *Prefix::GetPrefixData() const {
63097
+ return IsInlined() ? &value.inlined[0] : value.ptr;
63098
+ }
63099
+
63100
+ uint8_t *Prefix::AllocatePrefix(uint32_t size) {
63101
+ Destroy();
63102
+
63103
+ this->size = size;
63104
+ uint8_t *prefix;
63105
+ if (IsInlined()) {
63106
+ prefix = &value.inlined[0];
63107
+ } else {
63108
+ // allocate new prefix
63109
+ value.ptr = AllocateArray<uint8_t>(size);
63110
+ prefix = value.ptr;
63111
+ }
63112
+ return prefix;
63113
+ }
63114
+
62892
63115
  Prefix::Prefix() : size(0) {
62893
63116
  }
62894
63117
 
62895
- Prefix::Prefix(Key &key, uint32_t depth, uint32_t size) : size(size) {
62896
- // allocate new prefix
62897
- prefix = unique_ptr<uint8_t[]>(new uint8_t[size]);
63118
+ Prefix::Prefix(Key &key, uint32_t depth, uint32_t size) : size(0) {
63119
+ auto prefix = AllocatePrefix(size);
62898
63120
 
62899
63121
  // copy key to prefix
62900
63122
  idx_t prefix_idx = 0;
@@ -62903,43 +63125,72 @@ Prefix::Prefix(Key &key, uint32_t depth, uint32_t size) : size(size) {
62903
63125
  }
62904
63126
  }
62905
63127
 
62906
- Prefix::Prefix(Prefix &other_prefix, uint32_t size) : size(size) {
62907
- // allocate new prefix
62908
- prefix = unique_ptr<uint8_t[]>(new uint8_t[size]);
63128
+ Prefix::Prefix(Prefix &other_prefix, uint32_t size) : size(0) {
63129
+ auto prefix = AllocatePrefix(size);
62909
63130
 
62910
63131
  // copy key to Prefix
63132
+ auto other_data = other_prefix.GetPrefixData();
62911
63133
  for (idx_t i = 0; i < size; i++) {
62912
- prefix[i] = other_prefix[i];
63134
+ prefix[i] = other_data[i];
63135
+ }
63136
+ }
63137
+
63138
+ Prefix::~Prefix() {
63139
+ Destroy();
63140
+ }
63141
+
63142
+ void Prefix::Destroy() {
63143
+ if (!IsInlined()) {
63144
+ DeleteArray<uint8_t>(value.ptr, size);
63145
+ size = 0;
62913
63146
  }
62914
63147
  }
62915
63148
 
62916
63149
  uint8_t &Prefix::operator[](idx_t idx) {
62917
63150
  D_ASSERT(idx < Size());
62918
- return prefix[idx];
63151
+ return GetPrefixData()[idx];
62919
63152
  }
62920
63153
 
62921
63154
  Prefix &Prefix::operator=(const Prefix &src) {
62922
- // allocate new prefix
62923
- prefix = unique_ptr<uint8_t[]>(new uint8_t[src.size]);
63155
+ auto prefix = AllocatePrefix(src.size);
62924
63156
 
62925
63157
  // copy prefix
63158
+ auto src_prefix = src.GetPrefixData();
62926
63159
  for (idx_t i = 0; i < src.size; i++) {
62927
- prefix[i] = src.prefix[i];
63160
+ prefix[i] = src_prefix[i];
62928
63161
  }
62929
63162
  size = src.size;
62930
63163
  return *this;
62931
63164
  }
62932
63165
 
62933
63166
  Prefix &Prefix::operator=(Prefix &&other) noexcept {
62934
- prefix = move(other.prefix);
62935
- size = other.size;
63167
+ std::swap(size, other.size);
63168
+ std::swap(value, other.value);
62936
63169
  return *this;
62937
63170
  }
62938
63171
 
63172
+ void Prefix::Overwrite(uint32_t new_size, uint8_t *data) {
63173
+ if (new_size <= PREFIX_INLINE_BYTES) {
63174
+ // new entry would be inlined
63175
+ // inline the data and destroy the pointer
63176
+ auto prefix = AllocatePrefix(new_size);
63177
+ for (idx_t i = 0; i < new_size; i++) {
63178
+ prefix[i] = data[i];
63179
+ }
63180
+ DeleteArray<uint8_t>(data, new_size);
63181
+ } else {
63182
+ // new entry would not be inlined
63183
+ // take over the data directly
63184
+ Destroy();
63185
+ size = new_size;
63186
+ value.ptr = data;
63187
+ }
63188
+ }
63189
+
62939
63190
  void Prefix::Concatenate(uint8_t key, Prefix &other) {
62940
63191
  auto new_length = size + 1 + other.size;
62941
63192
  // have to allocate space in our prefix array
62942
- unique_ptr<uint8_t[]> new_prefix = unique_ptr<uint8_t[]>(new uint8_t[new_length]);
63193
+ auto new_prefix = AllocateArray<uint8_t>(new_length);
62943
63194
  idx_t new_prefix_idx = 0;
62944
63195
  // 1) add the to-be deleted node's prefix
62945
63196
  for (uint32_t i = 0; i < other.size; i++) {
@@ -62948,42 +63199,46 @@ void Prefix::Concatenate(uint8_t key, Prefix &other) {
62948
63199
  // 2) now move the current key as part of the prefix
62949
63200
  new_prefix[new_prefix_idx++] = key;
62950
63201
  // 3) move the existing prefix (if any)
63202
+ auto prefix = GetPrefixData();
62951
63203
  for (uint32_t i = 0; i < size; i++) {
62952
63204
  new_prefix[new_prefix_idx++] = prefix[i];
62953
63205
  }
62954
- prefix = move(new_prefix);
62955
- size = new_length;
63206
+ Overwrite(new_length, new_prefix);
62956
63207
  }
62957
63208
 
62958
63209
  uint8_t Prefix::Reduce(uint32_t n) {
62959
63210
  auto new_size = size - n - 1;
62960
- auto new_prefix = unique_ptr<uint8_t[]>(new uint8_t[new_size]);
63211
+ auto prefix = GetPrefixData();
62961
63212
  auto key = prefix[n];
63213
+ if (new_size == 0) {
63214
+ Destroy();
63215
+ size = 0;
63216
+ return key;
63217
+ }
63218
+ auto new_prefix = AllocateArray<uint8_t>(new_size);
62962
63219
  for (idx_t i = 0; i < new_size; i++) {
62963
63220
  new_prefix[i] = prefix[i + n + 1];
62964
63221
  }
62965
- prefix = move(new_prefix);
62966
- size = new_size;
63222
+ Overwrite(new_size, new_prefix);
62967
63223
  return key;
62968
63224
  }
62969
63225
 
62970
63226
  void Prefix::Serialize(duckdb::MetaBlockWriter &writer) {
62971
63227
  writer.Write(size);
62972
- for (idx_t i = 0; i < size; i++) {
62973
- writer.Write(prefix[i]);
62974
- }
63228
+ auto prefix = GetPrefixData();
63229
+ writer.WriteData(prefix, size);
62975
63230
  }
62976
63231
 
62977
63232
  void Prefix::Deserialize(duckdb::MetaBlockReader &reader) {
62978
- size = reader.Read<uint32_t>();
62979
- prefix = unique_ptr<uint8_t[]>(new uint8_t[size]);
62980
- for (idx_t i = 0; i < size; i++) {
62981
- prefix[i] = reader.Read<uint8_t>();
62982
- }
63233
+ auto prefix_size = reader.Read<uint32_t>();
63234
+ auto prefix = AllocatePrefix(prefix_size);
63235
+ this->size = prefix_size;
63236
+ reader.ReadData(prefix, size);
62983
63237
  }
62984
63238
 
62985
63239
  uint32_t Prefix::KeyMismatchPosition(Key &key, uint64_t depth) {
62986
63240
  uint64_t pos;
63241
+ auto prefix = GetPrefixData();
62987
63242
  for (pos = 0; pos < size; pos++) {
62988
63243
  if (key[depth + pos] != prefix[pos]) {
62989
63244
  return pos;
@@ -62993,9 +63248,10 @@ uint32_t Prefix::KeyMismatchPosition(Key &key, uint64_t depth) {
62993
63248
  }
62994
63249
 
62995
63250
  uint32_t Prefix::MismatchPosition(Prefix &other) {
62996
-
63251
+ auto prefix = GetPrefixData();
63252
+ auto other_data = other.GetPrefixData();
62997
63253
  for (idx_t i = 0; i < size; i++) {
62998
- if (prefix[i] != other[i]) {
63254
+ if (prefix[i] != other_data[i]) {
62999
63255
  return i;
63000
63256
  }
63001
63257
  }
@@ -63009,7 +63265,7 @@ namespace duckdb {
63009
63265
  SwizzleablePointer::~SwizzleablePointer() {
63010
63266
  if (pointer) {
63011
63267
  if (!IsSwizzled()) {
63012
- delete (Node *)pointer;
63268
+ Node::Delete((Node *)pointer);
63013
63269
  }
63014
63270
  }
63015
63271
  }
@@ -63071,7 +63327,7 @@ bool SwizzleablePointer::IsSwizzled() {
63071
63327
  void SwizzleablePointer::Reset() {
63072
63328
  if (pointer) {
63073
63329
  if (!IsSwizzled()) {
63074
- delete (Node *)pointer;
63330
+ Node::Delete((Node *)pointer);
63075
63331
  }
63076
63332
  }
63077
63333
  *this = nullptr;
@@ -81125,17 +81381,21 @@ bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_ch
81125
81381
 
81126
81382
  namespace duckdb {
81127
81383
 
81128
- CSVBuffer::CSVBuffer(idx_t buffer_size_p, CSVFileHandle &file_handle) : first_buffer(true) {
81129
- buffer = unique_ptr<char[]>(new char[buffer_size_p]);
81130
- actual_size = file_handle.Read(buffer.get(), buffer_size_p);
81384
+ CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle)
81385
+ : context(context), first_buffer(true) {
81386
+ this->handle = AllocateBuffer(buffer_size_p);
81387
+
81388
+ auto buffer = Ptr();
81389
+ actual_size = file_handle.Read(buffer, buffer_size_p);
81131
81390
  if (actual_size >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') {
81132
81391
  start_position += 3;
81133
81392
  }
81134
81393
  last_buffer = file_handle.FinishedReading();
81135
81394
  }
81136
81395
 
81137
- CSVBuffer::CSVBuffer(unique_ptr<char[]> buffer_p, idx_t buffer_size_p, idx_t actual_size_p, bool final_buffer)
81138
- : buffer(move(buffer_p)), actual_size(actual_size_p), last_buffer(final_buffer) {
81396
+ CSVBuffer::CSVBuffer(ClientContext &context, BufferHandle buffer_p, idx_t buffer_size_p, idx_t actual_size_p,
81397
+ bool final_buffer)
81398
+ : context(context), handle(move(buffer_p)), actual_size(actual_size_p), last_buffer(final_buffer) {
81139
81399
  }
81140
81400
 
81141
81401
  unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t set_buffer_size) {
@@ -81144,14 +81404,18 @@ unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t set_buff
81144
81404
  return nullptr;
81145
81405
  }
81146
81406
 
81147
- auto next_buffer = unique_ptr<char[]>(new char[set_buffer_size]);
81148
-
81149
- idx_t next_buffer_actual_size = file_handle.Read(next_buffer.get(), set_buffer_size);
81407
+ auto next_buffer = AllocateBuffer(set_buffer_size);
81408
+ idx_t next_buffer_actual_size = file_handle.Read(next_buffer.Ptr(), set_buffer_size);
81150
81409
 
81151
- return make_unique<CSVBuffer>(move(next_buffer), set_buffer_size, next_buffer_actual_size,
81410
+ return make_unique<CSVBuffer>(context, move(next_buffer), set_buffer_size, next_buffer_actual_size,
81152
81411
  file_handle.FinishedReading());
81153
81412
  }
81154
81413
 
81414
+ BufferHandle CSVBuffer::AllocateBuffer(idx_t buffer_size) {
81415
+ auto &buffer_manager = BufferManager::GetBufferManager(context);
81416
+ return buffer_manager.Allocate(MaxValue<idx_t>(Storage::BLOCK_SIZE, buffer_size));
81417
+ }
81418
+
81155
81419
  idx_t CSVBuffer::GetBufferSize() {
81156
81420
  return actual_size;
81157
81421
  }
@@ -81202,6 +81466,9 @@ static bool ParseBoolean(const Value &value, const string &loption) {
81202
81466
  }
81203
81467
 
81204
81468
  static string ParseString(const Value &value, const string &loption) {
81469
+ if (value.IsNull()) {
81470
+ return string();
81471
+ }
81205
81472
  if (value.type().id() == LogicalTypeId::LIST) {
81206
81473
  auto &children = ListValue::GetChildren(value);
81207
81474
  if (children.size() != 1) {
@@ -81356,6 +81623,11 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
81356
81623
  ignore_errors = ParseBoolean(value, loption);
81357
81624
  } else if (loption == "union_by_name") {
81358
81625
  union_by_name = ParseBoolean(value, loption);
81626
+ } else if (loption == "buffer_size") {
81627
+ buffer_size = ParseInteger(value, loption);
81628
+ if (buffer_size == 0) {
81629
+ throw InvalidInputException("Buffer Size option must be higher than 0");
81630
+ }
81359
81631
  } else {
81360
81632
  throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
81361
81633
  }
@@ -81469,34 +81741,38 @@ struct CSVBufferRead {
81469
81741
 
81470
81742
  const char &operator[](size_t i) const {
81471
81743
  if (i < buffer->GetBufferSize()) {
81472
- return buffer->buffer[i];
81744
+ auto buffer_ptr = buffer->Ptr();
81745
+ return buffer_ptr[i];
81473
81746
  }
81474
- return next_buffer->buffer[i - buffer->GetBufferSize()];
81747
+ auto next_ptr = next_buffer->Ptr();
81748
+ return next_ptr[i - buffer->GetBufferSize()];
81475
81749
  }
81476
81750
 
81477
81751
  string_t GetValue(idx_t start_buffer, idx_t position_buffer, idx_t offset) {
81478
81752
  idx_t length = position_buffer - start_buffer - offset;
81479
81753
  // 1) It's all in the current buffer
81480
81754
  if (start_buffer + length <= buffer->GetBufferSize()) {
81481
- auto buffer_ptr = buffer->buffer.get();
81755
+ auto buffer_ptr = buffer->Ptr();
81482
81756
  return string_t(buffer_ptr + start_buffer, length);
81483
81757
  } else if (start_buffer >= buffer->GetBufferSize()) {
81484
81758
  // 2) It's all in the next buffer
81485
81759
  D_ASSERT(next_buffer);
81486
81760
  D_ASSERT(next_buffer->GetBufferSize() >= length + (start_buffer - buffer->GetBufferSize()));
81487
- auto buffer_ptr = next_buffer->buffer.get();
81761
+ auto buffer_ptr = next_buffer->Ptr();
81488
81762
  return string_t(buffer_ptr + (start_buffer - buffer->GetBufferSize()), length);
81489
81763
  } else {
81490
81764
  // 3) It starts in the current buffer and ends in the next buffer
81491
81765
  D_ASSERT(next_buffer);
81492
81766
  auto intersection = unique_ptr<char[]>(new char[length]);
81493
81767
  idx_t cur_pos = 0;
81768
+ auto buffer_ptr = buffer->Ptr();
81494
81769
  for (idx_t i = start_buffer; i < buffer->GetBufferSize(); i++) {
81495
- intersection[cur_pos++] = buffer->buffer[i];
81770
+ intersection[cur_pos++] = buffer_ptr[i];
81496
81771
  }
81497
81772
  idx_t nxt_buffer_pos = 0;
81773
+ auto next_buffer_ptr = next_buffer->Ptr();
81498
81774
  for (; cur_pos < length; cur_pos++) {
81499
- intersection[cur_pos] = next_buffer->buffer[nxt_buffer_pos++];
81775
+ intersection[cur_pos] = next_buffer_ptr[nxt_buffer_pos++];
81500
81776
  }
81501
81777
  intersections.emplace_back(move(intersection));
81502
81778
  return string_t(intersections.back().get(), length);
@@ -81809,10 +82085,11 @@ normal : {
81809
82085
  /* state: normal parsing state */
81810
82086
  // this state parses the remainder of a non-quoted value until we reach a delimiter or newline
81811
82087
  for (; position_buffer < end_buffer; position_buffer++) {
81812
- if ((*buffer)[position_buffer] == options.delimiter[0]) {
82088
+ auto c = (*buffer)[position_buffer];
82089
+ if (c == options.delimiter[0]) {
81813
82090
  // delimiter: end the value and add it to the chunk
81814
82091
  goto add_value;
81815
- } else if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
82092
+ } else if (StringUtil::CharacterIsNewline(c)) {
81816
82093
  // newline: add row
81817
82094
  D_ASSERT(try_add_line || column == insert_chunk.ColumnCount() - 1);
81818
82095
  goto add_row;
@@ -81882,10 +82159,11 @@ in_quotes:
81882
82159
  has_quotes = true;
81883
82160
  position_buffer++;
81884
82161
  for (; position_buffer < end_buffer; position_buffer++) {
81885
- if ((*buffer)[position_buffer] == options.quote[0]) {
82162
+ auto c = (*buffer)[position_buffer];
82163
+ if (c == options.quote[0]) {
81886
82164
  // quote: move to unquoted state
81887
82165
  goto unquote;
81888
- } else if ((*buffer)[position_buffer] == options.escape[0]) {
82166
+ } else if (c == options.escape[0]) {
81889
82167
  // escape: store the escaped position and move to handle_escape state
81890
82168
  escape_positions.push_back(position_buffer - start_buffer);
81891
82169
  goto handle_escape;
@@ -81907,7 +82185,7 @@ in_quotes:
81907
82185
  goto in_quotes;
81908
82186
  }
81909
82187
 
81910
- unquote:
82188
+ unquote : {
81911
82189
  /* state: unquote: this state handles the state directly after we unquote*/
81912
82190
  //
81913
82191
  // in this state we expect either another quote (entering the quoted state again, and escaping the quote)
@@ -81917,16 +82195,16 @@ unquote:
81917
82195
  offset = 1;
81918
82196
  goto final_state;
81919
82197
  }
81920
- if ((*buffer)[position_buffer] == options.quote[0] &&
81921
- (options.escape.empty() || options.escape[0] == options.quote[0])) {
82198
+ auto c = (*buffer)[position_buffer];
82199
+ if (c == options.quote[0] && (options.escape.empty() || options.escape[0] == options.quote[0])) {
81922
82200
  // escaped quote, return to quoted state and store escape position
81923
82201
  escape_positions.push_back(position_buffer - start_buffer);
81924
82202
  goto in_quotes;
81925
- } else if ((*buffer)[position_buffer] == options.delimiter[0]) {
82203
+ } else if (c == options.delimiter[0]) {
81926
82204
  // delimiter, add value
81927
82205
  offset = 1;
81928
82206
  goto add_value;
81929
- } else if (StringUtil::CharacterIsNewline((*buffer)[position_buffer])) {
82207
+ } else if (StringUtil::CharacterIsNewline(c)) {
81930
82208
  offset = 1;
81931
82209
  D_ASSERT(column == insert_chunk.ColumnCount() - 1);
81932
82210
  goto add_row;
@@ -81941,6 +82219,7 @@ unquote:
81941
82219
  options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
81942
82220
  return false;
81943
82221
  }
82222
+ }
81944
82223
  handle_escape : {
81945
82224
  /* state: handle_escape */
81946
82225
  // escape should be followed by a quote or another escape character
@@ -124848,7 +125127,7 @@ void SubstringDetection(string &str_1, string &str_2, const string &name_str_1,
124848
125127
  if (str_1.empty() || str_2.empty()) {
124849
125128
  return;
124850
125129
  }
124851
- if ((str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos) && str_1 != "NULL") {
125130
+ if ((str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos)) {
124852
125131
  throw BinderException("%s must not appear in the %s specification and vice versa", name_str_1, name_str_2);
124853
125132
  }
124854
125133
  }
@@ -124941,6 +125220,11 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, CopyInfo &in
124941
125220
  options.force_not_null.resize(expected_types.size(), false);
124942
125221
  }
124943
125222
  bind_data->FinalizeRead(context);
125223
+ if (!bind_data->single_threaded && options.auto_detect) {
125224
+ options.file_path = bind_data->files[0];
125225
+ auto initial_reader = make_unique<BufferedCSVReader>(context, options);
125226
+ options = initial_reader->options;
125227
+ }
124944
125228
  return move(bind_data);
124945
125229
  }
124946
125230
 
@@ -126083,11 +126367,6 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
126083
126367
  options.include_file_name = BooleanValue::Get(kv.second);
126084
126368
  } else if (loption == "hive_partitioning") {
126085
126369
  options.include_parsed_hive_partitions = BooleanValue::Get(kv.second);
126086
- } else if (loption == "buffer_size") {
126087
- options.buffer_size = kv.second.GetValue<uint64_t>();
126088
- if (options.buffer_size == 0) {
126089
- throw InvalidInputException("Buffer Size option must be higher than 0");
126090
- }
126091
126370
  } else {
126092
126371
  options.SetReadOption(loption, kv.second, names);
126093
126372
  }
@@ -126106,7 +126385,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
126106
126385
  } else {
126107
126386
  D_ASSERT(return_types.size() == names.size());
126108
126387
  }
126109
- options = result->options;
126388
+ options = initial_reader->options;
126110
126389
  result->sql_types = initial_reader->sql_types;
126111
126390
  result->initial_reader = move(initial_reader);
126112
126391
  } else {
@@ -126204,8 +126483,9 @@ static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFun
126204
126483
  //===--------------------------------------------------------------------===//
126205
126484
  struct ParallelCSVGlobalState : public GlobalTableFunctionState {
126206
126485
  public:
126207
- ParallelCSVGlobalState(unique_ptr<CSVFileHandle> file_handle_p, vector<string> &files_path_p,
126208
- idx_t system_threads_p, idx_t buffer_size_p, idx_t rows_to_skip)
126486
+ ParallelCSVGlobalState(ClientContext &context, unique_ptr<CSVFileHandle> file_handle_p,
126487
+ vector<string> &files_path_p, idx_t system_threads_p, idx_t buffer_size_p,
126488
+ idx_t rows_to_skip)
126209
126489
  : file_handle(move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p) {
126210
126490
  for (idx_t i = 0; i < rows_to_skip; i++) {
126211
126491
  file_handle->ReadLine();
@@ -126219,7 +126499,7 @@ public:
126219
126499
  } else {
126220
126500
  bytes_per_local_state = file_size / MaxThreads();
126221
126501
  }
126222
- current_buffer = make_shared<CSVBuffer>(buffer_size, *file_handle);
126502
+ current_buffer = make_shared<CSVBuffer>(context, buffer_size, *file_handle);
126223
126503
  next_buffer = current_buffer->Next(*file_handle, buffer_size);
126224
126504
  }
126225
126505
  ParallelCSVGlobalState() {
@@ -126306,7 +126586,7 @@ unique_ptr<CSVBufferRead> ParallelCSVGlobalState::Next(ClientContext &context, R
126306
126586
  if (file_index < bind_data.files.size()) {
126307
126587
  bind_data.options.file_path = bind_data.files[file_index++];
126308
126588
  file_handle = ReadCSV::OpenCSV(bind_data.options, context);
126309
- next_buffer = make_shared<CSVBuffer>(buffer_size, *file_handle);
126589
+ next_buffer = make_shared<CSVBuffer>(context, buffer_size, *file_handle);
126310
126590
  }
126311
126591
  }
126312
126592
  return result;
@@ -126327,8 +126607,9 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
126327
126607
  file_handle = ReadCSV::OpenCSV(bind_data.options, context);
126328
126608
  }
126329
126609
  idx_t rows_to_skip = bind_data.options.skip_rows + (bind_data.options.has_header ? 1 : 0);
126330
- return make_unique<ParallelCSVGlobalState>(move(file_handle), bind_data.files, context.db->NumberOfThreads(),
126331
- bind_data.options.buffer_size, rows_to_skip);
126610
+ return make_unique<ParallelCSVGlobalState>(context, move(file_handle), bind_data.files,
126611
+ context.db->NumberOfThreads(), bind_data.options.buffer_size,
126612
+ rows_to_skip);
126332
126613
  }
126333
126614
 
126334
126615
  //===--------------------------------------------------------------------===//
@@ -138102,6 +138383,9 @@ void DatabaseInstance::Configure(DBConfig &new_config) {
138102
138383
  if (!config.error_manager) {
138103
138384
  config.error_manager = make_unique<ErrorManager>();
138104
138385
  }
138386
+ if (!config.default_allocator) {
138387
+ config.default_allocator = Allocator::DefaultAllocatorReference();
138388
+ }
138105
138389
  }
138106
138390
 
138107
138391
  DBConfig &DBConfig::GetConfig(ClientContext &context) {
@@ -202438,6 +202722,22 @@ string BufferManager::InMemoryWarning() {
202438
202722
  "\nOr set PRAGMA temp_directory='/path/to/tmp.tmp'";
202439
202723
  }
202440
202724
 
202725
+ void BufferManager::ReserveMemory(idx_t size) {
202726
+ if (size == 0) {
202727
+ return;
202728
+ }
202729
+ auto reservation =
202730
+ EvictBlocksOrThrow(size, maximum_memory, nullptr, "failed to reserve memory data of size %lld%s", size);
202731
+ reservation.size = 0;
202732
+ }
202733
+
202734
+ void BufferManager::FreeReservedMemory(idx_t size) {
202735
+ if (size == 0) {
202736
+ return;
202737
+ }
202738
+ current_memory -= size;
202739
+ }
202740
+
202441
202741
  //===--------------------------------------------------------------------===//
202442
202742
  // Buffer Allocator
202443
202743
  //===--------------------------------------------------------------------===//