duckdb 0.5.2-dev708.0 → 0.5.2-dev737.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -1388,11 +1388,6 @@ CopyFunctionCatalogEntry::CopyFunctionCatalogEntry(Catalog *catalog, SchemaCatal
1388
1388
 
1389
1389
 
1390
1390
 
1391
-
1392
-
1393
-
1394
-
1395
-
1396
1391
  //===----------------------------------------------------------------------===//
1397
1392
  // DuckDB
1398
1393
  //
@@ -1609,34 +1604,47 @@ inline void Radix::EncodeData(data_ptr_t dataptr, interval_t value) {
1609
1604
 
1610
1605
 
1611
1606
 
1607
+
1612
1608
  namespace duckdb {
1613
1609
 
1614
1610
  class Key {
1615
1611
  public:
1616
- Key(unique_ptr<data_t[]> data, idx_t len);
1617
-
1618
- explicit Key(idx_t len);
1612
+ Key();
1613
+ Key(data_ptr_t data, idx_t len);
1614
+ Key(ArenaAllocator &allocator, idx_t len);
1619
1615
 
1620
1616
  idx_t len;
1621
- unique_ptr<data_t[]> data;
1617
+ data_ptr_t data;
1622
1618
 
1623
1619
  public:
1624
1620
  template <class T>
1625
- static inline unique_ptr<Key> CreateKey(T element) {
1626
- auto data = Key::CreateData<T>(element);
1627
- return make_unique<Key>(move(data), sizeof(element));
1621
+ static inline Key CreateKey(ArenaAllocator &allocator, T element) {
1622
+ auto data = Key::CreateData<T>(allocator, element);
1623
+ return Key(data, sizeof(element));
1624
+ }
1625
+
1626
+ template <class T>
1627
+ static inline Key CreateKey(ArenaAllocator &allocator, const Value &element) {
1628
+ return CreateKey(allocator, element.GetValueUnsafe<T>());
1628
1629
  }
1629
1630
 
1630
1631
  template <class T>
1631
- static inline unique_ptr<Key> CreateKey(const Value &element) {
1632
- return CreateKey(element.GetValueUnsafe<T>());
1632
+ static inline void CreateKey(ArenaAllocator &allocator, Key &key, T element) {
1633
+ key.data = Key::CreateData<T>(allocator, element);
1634
+ key.len = sizeof(element);
1635
+ }
1636
+
1637
+ template <class T>
1638
+ static inline void CreateKey(ArenaAllocator &allocator, Key &key, const Value element) {
1639
+ key.data = Key::CreateData<T>(allocator, element.GetValueUnsafe<T>());
1640
+ key.len = sizeof(element);
1633
1641
  }
1634
1642
 
1635
1643
  public:
1636
- data_t &operator[](std::size_t i) {
1644
+ data_t &operator[](size_t i) {
1637
1645
  return data[i];
1638
1646
  }
1639
- const data_t &operator[](std::size_t i) const {
1647
+ const data_t &operator[](size_t i) const {
1640
1648
  return data[i];
1641
1649
  }
1642
1650
  bool operator>(const Key &k) const;
@@ -1645,23 +1653,39 @@ public:
1645
1653
  bool operator==(const Key &k) const;
1646
1654
 
1647
1655
  bool ByteMatches(Key &other, idx_t &depth);
1656
+ bool Empty();
1657
+ void ConcatenateKey(ArenaAllocator &allocator, Key &concat_key);
1648
1658
 
1649
1659
  private:
1650
1660
  template <class T>
1651
- static inline unique_ptr<data_t[]> CreateData(T value) {
1652
- auto data = unique_ptr<data_t[]>(new data_t[sizeof(value)]);
1653
- Radix::EncodeData<T>(data.get(), value);
1661
+ static inline data_ptr_t CreateData(ArenaAllocator &allocator, T value) {
1662
+ auto data = allocator.Allocate(sizeof(value));
1663
+ Radix::EncodeData<T>(data, value);
1654
1664
  return data;
1655
1665
  }
1656
1666
  };
1657
1667
 
1658
1668
  template <>
1659
- unique_ptr<Key> Key::CreateKey(string_t value);
1669
+ Key Key::CreateKey(ArenaAllocator &allocator, string_t value);
1670
+ template <>
1671
+ Key Key::CreateKey(ArenaAllocator &allocator, const char *value);
1660
1672
  template <>
1661
- unique_ptr<Key> Key::CreateKey(const char *value);
1673
+ void Key::CreateKey(ArenaAllocator &allocator, Key &key, string_t value);
1674
+ template <>
1675
+ void Key::CreateKey(ArenaAllocator &allocator, Key &key, const char *value);
1662
1676
 
1663
1677
  } // namespace duckdb
1664
1678
 
1679
+ //===----------------------------------------------------------------------===//
1680
+ // DuckDB
1681
+ //
1682
+ // duckdb/execution/index/art/iterator.hpp
1683
+ //
1684
+ //
1685
+ //===----------------------------------------------------------------------===//
1686
+
1687
+
1688
+
1665
1689
  //===----------------------------------------------------------------------===//
1666
1690
  // DuckDB
1667
1691
  //
@@ -1683,6 +1707,7 @@ unique_ptr<Key> Key::CreateKey(const char *value);
1683
1707
 
1684
1708
 
1685
1709
 
1710
+
1686
1711
  //===----------------------------------------------------------------------===//
1687
1712
  // DuckDB
1688
1713
  //
@@ -1693,7 +1718,6 @@ unique_ptr<Key> Key::CreateKey(const char *value);
1693
1718
 
1694
1719
 
1695
1720
 
1696
-
1697
1721
  //===----------------------------------------------------------------------===//
1698
1722
  // DuckDB
1699
1723
  //
@@ -1738,6 +1762,7 @@ private:
1738
1762
  } // namespace duckdb
1739
1763
 
1740
1764
 
1765
+
1741
1766
  namespace duckdb {
1742
1767
  class Prefix {
1743
1768
  public:
@@ -1786,7 +1811,6 @@ private:
1786
1811
 
1787
1812
 
1788
1813
 
1789
-
1790
1814
  namespace duckdb {
1791
1815
  enum class NodeType : uint8_t { NLeaf = 0, N4 = 1, N16 = 2, N48 = 3, N256 = 4 };
1792
1816
  class ART;
@@ -1870,16 +1894,18 @@ public:
1870
1894
  //! Create a new node of the specified type
1871
1895
  static void New(NodeType &type, Node *&node);
1872
1896
 
1897
+ //! Returns the string representation of a node
1898
+ string ToString(ART &art);
1873
1899
  //! Serialize this node
1874
1900
  BlockPointer Serialize(ART &art, duckdb::MetaBlockWriter &writer);
1901
+
1875
1902
  //! Deserialize this node
1876
1903
  static Node *Deserialize(ART &art, idx_t block_id, idx_t offset);
1877
-
1878
1904
  //! Merge r_node into l_node at the specified byte
1879
- static void MergeAtByte(MergeInfo &info, idx_t depth, idx_t &l_child_pos, idx_t &r_pos, uint8_t &key_byte,
1905
+ static bool MergeAtByte(MergeInfo &info, idx_t depth, idx_t &l_child_pos, idx_t &r_pos, uint8_t &key_byte,
1880
1906
  Node *&l_parent, idx_t l_pos);
1881
1907
  //! Merge two ART
1882
- static void MergeARTs(ART *l_art, ART *r_art);
1908
+ static bool MergeARTs(ART *l_art, ART *r_art);
1883
1909
 
1884
1910
  private:
1885
1911
  //! Serialize internal nodes
@@ -1912,8 +1938,10 @@ public:
1912
1938
  //! Remove a row_id from a leaf
1913
1939
  void Remove(row_t row_id);
1914
1940
 
1941
+ //! Returns the string representation of a leaf
1942
+ static string ToString(Node *node);
1915
1943
  //! Merge two NLeaf nodes
1916
- static void Merge(bool &has_constraint, Node *&l_node, Node *&r_node);
1944
+ static void Merge(Node *&l_node, Node *&r_node);
1917
1945
 
1918
1946
  //! Serialize a leaf
1919
1947
  BlockPointer Serialize(duckdb::MetaBlockWriter &writer);
@@ -1927,10 +1955,73 @@ private:
1927
1955
  } // namespace duckdb
1928
1956
 
1929
1957
 
1958
+
1959
+ namespace duckdb {
1960
+
1961
+ struct IteratorEntry {
1962
+ IteratorEntry() {
1963
+ }
1964
+ IteratorEntry(Node *node, idx_t pos) : node(node), pos(pos) {
1965
+ }
1966
+
1967
+ Node *node = nullptr;
1968
+ idx_t pos = 0;
1969
+ };
1970
+
1971
+ //! Keeps track of the current key in the iterator
1972
+ class IteratorCurrentKey {
1973
+ public:
1974
+ //! Push Byte
1975
+ void Push(uint8_t key);
1976
+ //! Pops n elements
1977
+ void Pop(idx_t n);
1978
+ //! Subscript operator
1979
+ uint8_t &operator[](idx_t idx);
1980
+ bool operator>(const Key &k) const;
1981
+ bool operator>=(const Key &k) const;
1982
+ bool operator==(const Key &k) const;
1983
+
1984
+ private:
1985
+ //! The current key position
1986
+ idx_t cur_key_pos = 0;
1987
+ //! The current key of the Leaf Node
1988
+ vector<uint8_t> key;
1989
+ };
1990
+
1991
+ class Iterator {
1992
+ public:
1993
+ //! Current Key
1994
+ IteratorCurrentKey cur_key;
1995
+ //! Pointer to the ART tree we are iterating
1996
+ ART *art = nullptr;
1997
+
1998
+ //! Scan the tree
1999
+ bool Scan(Key &bound, idx_t max_count, vector<row_t> &result_ids, bool is_inclusive);
2000
+ //! Finds minimum value of the tree
2001
+ void FindMinimum(Node &node);
2002
+ //! Goes to lower bound
2003
+ bool LowerBound(Node *node, Key &key, bool inclusive);
2004
+
2005
+ private:
2006
+ //! Stack of iterator entries
2007
+ stack<IteratorEntry> nodes;
2008
+ //! Last visited leaf
2009
+ Leaf *last_leaf = nullptr;
2010
+ //! Go to the next node
2011
+ bool Next();
2012
+ //! Push part of the key to cur_key
2013
+ void PushKey(Node *node, uint16_t pos);
2014
+ //! Pop node
2015
+ void PopNode();
2016
+ };
2017
+ } // namespace duckdb
2018
+
2019
+
2020
+
1930
2021
  //===----------------------------------------------------------------------===//
1931
2022
  // DuckDB
1932
2023
  //
1933
- // duckdb/execution/index/art/node4.hpp
2024
+ // duckdb/execution/index/art/node16.hpp
1934
2025
  //
1935
2026
  //
1936
2027
  //===----------------------------------------------------------------------===//
@@ -1977,14 +2068,14 @@ public:
1977
2068
 
1978
2069
  namespace duckdb {
1979
2070
 
1980
- class Node4 : public Node {
2071
+ class Node16 : public Node {
1981
2072
  public:
1982
- Node4();
1983
- uint8_t key[4];
1984
- SwizzleablePointer children[4];
2073
+ explicit Node16();
2074
+ uint8_t key[16];
2075
+ SwizzleablePointer children[16];
1985
2076
 
1986
2077
  public:
1987
- //! Get position of a byte, returns DConstants::INVALID_INDEX if not exists
2078
+ //! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
1988
2079
  idx_t GetChildPos(uint8_t k) override;
1989
2080
  //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
1990
2081
  //! if there are no children matching the criteria
@@ -1993,18 +2084,18 @@ public:
1993
2084
  idx_t GetMin() override;
1994
2085
  //! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position
1995
2086
  idx_t GetNextPos(idx_t pos) override;
1996
- //! Get Node4 child
2087
+ //! Get Node16 child
1997
2088
  Node *GetChild(ART &art, idx_t pos) override;
1998
2089
  //! Replace child pointer
1999
2090
  void ReplaceChildPointer(idx_t pos, Node *node) override;
2000
2091
 
2001
- //! Insert a new child node at key_byte into the Node4
2092
+ //! Insert a new child node at key_byte into the Node16
2002
2093
  static void InsertChild(Node *&node, uint8_t key_byte, Node *new_child);
2003
- //! Erase the child at pos and (if necessary) merge with last child
2094
+ //! Erase the child at pos and (if necessary) shrink to Node4
2004
2095
  static void EraseChild(Node *&node, int pos, ART &art);
2005
- //! Merge Node4 into l_node
2006
- static void Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
2007
- //! Returns the size (maximum capacity) of the Node4
2096
+ //! Merge Node16 into l_node
2097
+ static bool Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
2098
+ //! Returns the size (maximum capacity) of the Node16
2008
2099
  static idx_t GetSize();
2009
2100
  };
2010
2101
  } // namespace duckdb
@@ -2012,7 +2103,7 @@ public:
2012
2103
  //===----------------------------------------------------------------------===//
2013
2104
  // DuckDB
2014
2105
  //
2015
- // duckdb/execution/index/art/node16.hpp
2106
+ // duckdb/execution/index/art/node256.hpp
2016
2107
  //
2017
2108
  //
2018
2109
  //===----------------------------------------------------------------------===//
@@ -2023,11 +2114,10 @@ public:
2023
2114
 
2024
2115
  namespace duckdb {
2025
2116
 
2026
- class Node16 : public Node {
2117
+ class Node256 : public Node {
2027
2118
  public:
2028
- explicit Node16();
2029
- uint8_t key[16];
2030
- SwizzleablePointer children[16];
2119
+ explicit Node256();
2120
+ SwizzleablePointer children[256];
2031
2121
 
2032
2122
  public:
2033
2123
  //! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
@@ -2039,18 +2129,18 @@ public:
2039
2129
  idx_t GetMin() override;
2040
2130
  //! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position
2041
2131
  idx_t GetNextPos(idx_t pos) override;
2042
- //! Get Node16 child
2132
+ //! Get Node256 child
2043
2133
  Node *GetChild(ART &art, idx_t pos) override;
2044
2134
  //! Replace child pointer
2045
2135
  void ReplaceChildPointer(idx_t pos, Node *node) override;
2046
2136
 
2047
- //! Insert a new child node at key_byte into the Node16
2137
+ //! Insert a new child node at key_byte into the Node256
2048
2138
  static void InsertChild(Node *&node, uint8_t key_byte, Node *new_child);
2049
- //! Erase the child at pos and (if necessary) shrink to Node4
2139
+ //! Erase the child at pos and (if necessary) shrink to Node48
2050
2140
  static void EraseChild(Node *&node, int pos, ART &art);
2051
- //! Merge Node16 into l_node
2052
- static void Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
2053
- //! Returns the size (maximum capacity) of the Node16
2141
+ //! Merge Node256 into l_node
2142
+ static bool Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
2143
+ //! Returns the size (maximum capacity) of the Node256
2054
2144
  static idx_t GetSize();
2055
2145
  };
2056
2146
  } // namespace duckdb
@@ -2058,7 +2148,7 @@ public:
2058
2148
  //===----------------------------------------------------------------------===//
2059
2149
  // DuckDB
2060
2150
  //
2061
- // duckdb/execution/index/art/node48.hpp
2151
+ // duckdb/execution/index/art/node4.hpp
2062
2152
  //
2063
2153
  //
2064
2154
  //===----------------------------------------------------------------------===//
@@ -2069,14 +2159,14 @@ public:
2069
2159
 
2070
2160
  namespace duckdb {
2071
2161
 
2072
- class Node48 : public Node {
2162
+ class Node4 : public Node {
2073
2163
  public:
2074
- explicit Node48();
2075
- uint8_t child_index[256];
2076
- SwizzleablePointer children[48];
2164
+ Node4();
2165
+ uint8_t key[4];
2166
+ SwizzleablePointer children[4];
2077
2167
 
2078
2168
  public:
2079
- //! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
2169
+ //! Get position of a byte, returns DConstants::INVALID_INDEX if not exists
2080
2170
  idx_t GetChildPos(uint8_t k) override;
2081
2171
  //! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
2082
2172
  //! if there are no children matching the criteria
@@ -2085,18 +2175,18 @@ public:
2085
2175
  idx_t GetMin() override;
2086
2176
  //! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position
2087
2177
  idx_t GetNextPos(idx_t pos) override;
2088
- //! Get Node48 child
2178
+ //! Get Node4 child
2089
2179
  Node *GetChild(ART &art, idx_t pos) override;
2090
2180
  //! Replace child pointer
2091
2181
  void ReplaceChildPointer(idx_t pos, Node *node) override;
2092
2182
 
2093
- //! Insert a new child node at key_byte into the Node48
2183
+ //! Insert a new child node at key_byte into the Node4
2094
2184
  static void InsertChild(Node *&node, uint8_t key_byte, Node *new_child);
2095
- //! Erase the child at pos and (if necessary) shrink to Node16
2185
+ //! Erase the child at pos and (if necessary) merge with last child
2096
2186
  static void EraseChild(Node *&node, int pos, ART &art);
2097
- //! Merge Node48 into l_node
2098
- static void Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
2099
- //! Returns the size (maximum capacity) of the Node48
2187
+ //! Merge Node4 into l_node
2188
+ static bool Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
2189
+ //! Returns the size (maximum capacity) of the Node4
2100
2190
  static idx_t GetSize();
2101
2191
  };
2102
2192
  } // namespace duckdb
@@ -2104,7 +2194,7 @@ public:
2104
2194
  //===----------------------------------------------------------------------===//
2105
2195
  // DuckDB
2106
2196
  //
2107
- // duckdb/execution/index/art/node256.hpp
2197
+ // duckdb/execution/index/art/node48.hpp
2108
2198
  //
2109
2199
  //
2110
2200
  //===----------------------------------------------------------------------===//
@@ -2115,10 +2205,11 @@ public:
2115
2205
 
2116
2206
  namespace duckdb {
2117
2207
 
2118
- class Node256 : public Node {
2208
+ class Node48 : public Node {
2119
2209
  public:
2120
- explicit Node256();
2121
- SwizzleablePointer children[256];
2210
+ explicit Node48();
2211
+ uint8_t child_index[256];
2212
+ SwizzleablePointer children[48];
2122
2213
 
2123
2214
  public:
2124
2215
  //! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
@@ -2130,94 +2221,25 @@ public:
2130
2221
  idx_t GetMin() override;
2131
2222
  //! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position
2132
2223
  idx_t GetNextPos(idx_t pos) override;
2133
- //! Get Node256 child
2224
+ //! Get Node48 child
2134
2225
  Node *GetChild(ART &art, idx_t pos) override;
2135
2226
  //! Replace child pointer
2136
2227
  void ReplaceChildPointer(idx_t pos, Node *node) override;
2137
2228
 
2138
- //! Insert a new child node at key_byte into the Node256
2229
+ //! Insert a new child node at key_byte into the Node48
2139
2230
  static void InsertChild(Node *&node, uint8_t key_byte, Node *new_child);
2140
- //! Erase the child at pos and (if necessary) shrink to Node48
2231
+ //! Erase the child at pos and (if necessary) shrink to Node16
2141
2232
  static void EraseChild(Node *&node, int pos, ART &art);
2142
- //! Merge Node256 into l_node
2143
- static void Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
2144
- //! Returns the size (maximum capacity) of the Node256
2233
+ //! Merge Node48 into l_node
2234
+ static bool Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
2235
+ //! Returns the size (maximum capacity) of the Node48
2145
2236
  static idx_t GetSize();
2146
2237
  };
2147
2238
  } // namespace duckdb
2148
2239
 
2149
- //===----------------------------------------------------------------------===//
2150
- // DuckDB
2151
- //
2152
- // duckdb/execution/index/art/iterator.hpp
2153
- //
2154
- //
2155
- //===----------------------------------------------------------------------===//
2156
-
2157
-
2158
-
2159
-
2160
-
2161
-
2162
- namespace duckdb {
2163
-
2164
- struct IteratorEntry {
2165
- IteratorEntry() {
2166
- }
2167
- IteratorEntry(Node *node, idx_t pos) : node(node), pos(pos) {
2168
- }
2169
-
2170
- Node *node = nullptr;
2171
- idx_t pos = 0;
2172
- };
2173
2240
 
2174
- //! Keeps track of the current key in the iterator
2175
- class IteratorCurrentKey {
2176
- public:
2177
- //! Push Byte
2178
- void Push(uint8_t key);
2179
- //! Pops n elements
2180
- void Pop(idx_t n);
2181
- //! Subscript operator
2182
- uint8_t &operator[](idx_t idx);
2183
- bool operator>(const Key &k) const;
2184
- bool operator>=(const Key &k) const;
2185
- bool operator==(const Key &k) const;
2186
-
2187
- private:
2188
- //! The current key position
2189
- idx_t cur_key_pos = 0;
2190
- //! The current key of the Leaf Node
2191
- vector<uint8_t> key;
2192
- };
2193
-
2194
- class Iterator {
2195
- public:
2196
- //! Current Key
2197
- IteratorCurrentKey cur_key;
2198
- //! Pointer to the ART tree we are iterating
2199
- ART *art = nullptr;
2200
2241
 
2201
- //! Scan the tree
2202
- bool Scan(Key *bound, idx_t max_count, vector<row_t> &result_ids, bool is_inclusive);
2203
- //! Finds minimum value of the tree
2204
- void FindMinimum(Node &node);
2205
- //! Goes to lower bound
2206
- bool LowerBound(Node *node, Key &key, bool inclusive);
2207
2242
 
2208
- private:
2209
- //! Stack of iterator entries
2210
- stack<IteratorEntry> nodes;
2211
- //! Last visited leaf
2212
- Leaf *last_leaf = nullptr;
2213
- //! Go to the next node
2214
- bool Next();
2215
- //! Push part of the key to cur_key
2216
- void PushKey(Node *node, uint16_t pos);
2217
- //! Pop node
2218
- void PopNode();
2219
- };
2220
- } // namespace duckdb
2221
2243
 
2222
2244
 
2223
2245
  namespace duckdb {
@@ -2287,33 +2309,37 @@ public:
2287
2309
  //! Construct ARTs from sorted chunks and merge them.
2288
2310
  void ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator &allocator) override;
2289
2311
 
2290
- bool SearchEqual(ARTIndexScanState *state, idx_t max_count, vector<row_t> &result_ids);
2312
+ //! Search Equal and fetches the row IDs
2313
+ bool SearchEqual(Key &key, idx_t max_count, vector<row_t> &result_ids);
2291
2314
  //! Search Equal used for Joins that do not need to fetch data
2292
- void SearchEqualJoinNoFetch(Value &equal_value, idx_t &result_size);
2315
+ void SearchEqualJoinNoFetch(Key &key, idx_t &result_size);
2293
2316
  //! Serialized the ART
2294
2317
  BlockPointer Serialize(duckdb::MetaBlockWriter &writer) override;
2295
2318
 
2296
2319
  //! Merge two ARTs
2297
- static void Merge(ART *l_art, ART *r_art);
2320
+ bool MergeIndexes(IndexLock &state, Index *other_index) override;
2321
+ //! Generate ART keys for an input chunk
2322
+ static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<Key> &keys);
2323
+ //! Returns the string representation of an ART
2324
+ string ToString() override;
2298
2325
 
2299
2326
  private:
2300
2327
  //! Insert a row id into a leaf node
2301
2328
  bool InsertToLeaf(Leaf &leaf, row_t row_id);
2302
2329
  //! Insert the leaf value into the tree
2303
- bool Insert(Node *&node, unique_ptr<Key> key, unsigned depth, row_t row_id);
2330
+ bool Insert(Node *&node, Key &key, idx_t depth, row_t row_id);
2304
2331
 
2305
2332
  //! Erase element from leaf (if leaf has more than one value) or eliminate the leaf itself
2306
- void Erase(Node *&node, Key &key, unsigned depth, row_t row_id);
2333
+ void Erase(Node *&node, Key &key, idx_t depth, row_t row_id);
2307
2334
 
2308
2335
  //! Find the node with a matching key, optimistic version
2309
- Node *Lookup(Node *node, Key &key, unsigned depth);
2310
-
2311
- bool SearchGreater(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector<row_t> &result_ids);
2312
- bool SearchLess(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector<row_t> &result_ids);
2313
- bool SearchCloseRange(ARTIndexScanState *state, bool left_inclusive, bool right_inclusive, idx_t max_count,
2314
- vector<row_t> &result_ids);
2336
+ Leaf *Lookup(Node *node, Key &key, idx_t depth);
2315
2337
 
2316
- void GenerateKeys(DataChunk &input, vector<unique_ptr<Key>> &keys);
2338
+ bool SearchGreater(ARTIndexScanState *state, Key &key, bool inclusive, idx_t max_count, vector<row_t> &result_ids);
2339
+ bool SearchLess(ARTIndexScanState *state, Key &upper_bound, bool inclusive, idx_t max_count,
2340
+ vector<row_t> &result_ids);
2341
+ bool SearchCloseRange(ARTIndexScanState *state, Key &lower_bound, Key &upper_bound, bool left_inclusive,
2342
+ bool right_inclusive, idx_t max_count, vector<row_t> &result_ids);
2317
2343
 
2318
2344
  void VerifyExistence(DataChunk &chunk, VerifyExistenceType verify_type, string *err_msg_ptr = nullptr);
2319
2345
  };
@@ -33323,7 +33349,7 @@ void RowOperations::SwizzleColumns(const RowLayout &layout, const data_ptr_t bas
33323
33349
  }
33324
33350
  data_ptr_t col_ptr = row_ptr + layout.GetOffsets()[col_idx];
33325
33351
  if (physical_type == PhysicalType::VARCHAR) {
33326
- data_ptr_t string_ptr = col_ptr + sizeof(uint32_t) + string_t::PREFIX_LENGTH;
33352
+ data_ptr_t string_ptr = col_ptr + string_t::HEADER_SIZE;
33327
33353
  for (idx_t i = 0; i < next; i++) {
33328
33354
  if (Load<uint32_t>(col_ptr) > string_t::INLINE_LENGTH) {
33329
33355
  // Overwrite the string pointer with the within-row offset (if not inlined)
@@ -33427,7 +33453,7 @@ void RowOperations::UnswizzlePointers(const RowLayout &layout, const data_ptr_t
33427
33453
  }
33428
33454
  data_ptr_t col_ptr = row_ptr + layout.GetOffsets()[col_idx];
33429
33455
  if (physical_type == PhysicalType::VARCHAR) {
33430
- data_ptr_t string_ptr = col_ptr + sizeof(uint32_t) + string_t::PREFIX_LENGTH;
33456
+ data_ptr_t string_ptr = col_ptr + string_t::HEADER_SIZE;
33431
33457
  for (idx_t i = 0; i < next; i++) {
33432
33458
  if (Load<uint32_t>(col_ptr) > string_t::INLINE_LENGTH) {
33433
33459
  // Overwrite the string offset with the pointer (if not inlined)
@@ -33584,7 +33610,7 @@ static void GatherVarchar(Vector &rows, const SelectionVector &row_sel, Vector &
33584
33610
  // Not inline, so unswizzle the copied pointer the pointer
33585
33611
  auto heap_ptr_ptr = row + heap_offset;
33586
33612
  auto heap_row_ptr = base_heap_ptr + Load<idx_t>(heap_ptr_ptr);
33587
- auto string_ptr = data_ptr_t(data + col_idx) + sizeof(uint32_t) + string_t::PREFIX_LENGTH;
33613
+ auto string_ptr = data_ptr_t(data + col_idx) + string_t::HEADER_SIZE;
33588
33614
  Store<data_ptr_t>(heap_row_ptr + Load<idx_t>(string_ptr), string_ptr);
33589
33615
  #ifdef DEBUG
33590
33616
  data[col_idx].Verify();
@@ -35753,14 +35779,14 @@ int Comparators::TemplatedCompareListLoop(data_ptr_t &left_ptr, data_ptr_t &righ
35753
35779
 
35754
35780
  void Comparators::UnswizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type) {
35755
35781
  if (type.InternalType() == PhysicalType::VARCHAR) {
35756
- data_ptr += sizeof(uint32_t) + string_t::PREFIX_LENGTH;
35782
+ data_ptr += string_t::HEADER_SIZE;
35757
35783
  }
35758
35784
  Store<data_ptr_t>(heap_ptr + Load<idx_t>(data_ptr), data_ptr);
35759
35785
  }
35760
35786
 
35761
35787
  void Comparators::SwizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type) {
35762
35788
  if (type.InternalType() == PhysicalType::VARCHAR) {
35763
- data_ptr += sizeof(uint32_t) + string_t::PREFIX_LENGTH;
35789
+ data_ptr += string_t::HEADER_SIZE;
35764
35790
  }
35765
35791
  Store<idx_t>(Load<data_ptr_t>(data_ptr) - heap_ptr, data_ptr);
35766
35792
  }
@@ -58262,6 +58288,7 @@ ExpressionExecutorState::ExpressionExecutorState(const string &name) : profiler(
58262
58288
 
58263
58289
 
58264
58290
 
58291
+
58265
58292
  #include <algorithm>
58266
58293
  #include <cstring>
58267
58294
  #include <ctgmath>
@@ -58326,90 +58353,87 @@ unique_ptr<IndexScanState> ART::InitializeScanTwoPredicates(Transaction &transac
58326
58353
  }
58327
58354
 
58328
58355
  //===--------------------------------------------------------------------===//
58329
- // Insert
58356
+ // Keys
58330
58357
  //===--------------------------------------------------------------------===//
58358
+
58331
58359
  template <class T>
58332
- static void TemplatedGenerateKeys(Vector &input, idx_t count, vector<unique_ptr<Key>> &keys) {
58360
+ static void TemplatedGenerateKeys(ArenaAllocator &allocator, Vector &input, idx_t count, vector<Key> &keys) {
58333
58361
  UnifiedVectorFormat idata;
58334
58362
  input.ToUnifiedFormat(count, idata);
58335
58363
 
58364
+ D_ASSERT(keys.size() >= count);
58336
58365
  auto input_data = (T *)idata.data;
58337
58366
  for (idx_t i = 0; i < count; i++) {
58338
58367
  auto idx = idata.sel->get_index(i);
58339
58368
  if (idata.validity.RowIsValid(idx)) {
58340
- keys.push_back(Key::CreateKey<T>(input_data[idx]));
58341
- } else {
58342
- keys.push_back(nullptr);
58369
+ Key::CreateKey<T>(allocator, keys[i], input_data[idx]);
58343
58370
  }
58344
58371
  }
58345
58372
  }
58346
58373
 
58347
58374
  template <class T>
58348
- static void ConcatenateKeys(Vector &input, idx_t count, vector<unique_ptr<Key>> &keys) {
58375
+ static void ConcatenateKeys(ArenaAllocator &allocator, Vector &input, idx_t count, vector<Key> &keys) {
58349
58376
  UnifiedVectorFormat idata;
58350
58377
  input.ToUnifiedFormat(count, idata);
58351
58378
 
58352
58379
  auto input_data = (T *)idata.data;
58353
58380
  for (idx_t i = 0; i < count; i++) {
58354
58381
  auto idx = idata.sel->get_index(i);
58355
- if (!idata.validity.RowIsValid(idx) || !keys[i]) {
58356
- // either this column is NULL, or the previous column is NULL!
58357
- keys[i] = nullptr;
58358
- } else {
58359
- // concatenate the keys
58360
- auto old_key = move(keys[i]);
58361
- auto new_key = Key::CreateKey<T>(input_data[idx]);
58362
- auto key_len = old_key->len + new_key->len;
58363
- auto compound_data = unique_ptr<data_t[]>(new data_t[key_len]);
58364
- memcpy(compound_data.get(), old_key->data.get(), old_key->len);
58365
- memcpy(compound_data.get() + old_key->len, new_key->data.get(), new_key->len);
58366
- keys[i] = make_unique<Key>(move(compound_data), key_len);
58382
+
58383
+ // key is not NULL (no previous column entry was NULL)
58384
+ if (!keys[i].Empty()) {
58385
+ if (!idata.validity.RowIsValid(idx)) {
58386
+ // this column entry is NULL, set whole key to NULL
58387
+ keys[i] = Key();
58388
+ } else {
58389
+ auto other_key = Key::CreateKey<T>(allocator, input_data[idx]);
58390
+ keys[i].ConcatenateKey(allocator, other_key);
58391
+ }
58367
58392
  }
58368
58393
  }
58369
58394
  }
58370
58395
 
58371
- void ART::GenerateKeys(DataChunk &input, vector<unique_ptr<Key>> &keys) {
58372
- keys.reserve(STANDARD_VECTOR_SIZE);
58396
+ void ART::GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<Key> &keys) {
58373
58397
  // generate keys for the first input column
58374
58398
  switch (input.data[0].GetType().InternalType()) {
58375
58399
  case PhysicalType::BOOL:
58376
- TemplatedGenerateKeys<bool>(input.data[0], input.size(), keys);
58400
+ TemplatedGenerateKeys<bool>(allocator, input.data[0], input.size(), keys);
58377
58401
  break;
58378
58402
  case PhysicalType::INT8:
58379
- TemplatedGenerateKeys<int8_t>(input.data[0], input.size(), keys);
58403
+ TemplatedGenerateKeys<int8_t>(allocator, input.data[0], input.size(), keys);
58380
58404
  break;
58381
58405
  case PhysicalType::INT16:
58382
- TemplatedGenerateKeys<int16_t>(input.data[0], input.size(), keys);
58406
+ TemplatedGenerateKeys<int16_t>(allocator, input.data[0], input.size(), keys);
58383
58407
  break;
58384
58408
  case PhysicalType::INT32:
58385
- TemplatedGenerateKeys<int32_t>(input.data[0], input.size(), keys);
58409
+ TemplatedGenerateKeys<int32_t>(allocator, input.data[0], input.size(), keys);
58386
58410
  break;
58387
58411
  case PhysicalType::INT64:
58388
- TemplatedGenerateKeys<int64_t>(input.data[0], input.size(), keys);
58412
+ TemplatedGenerateKeys<int64_t>(allocator, input.data[0], input.size(), keys);
58389
58413
  break;
58390
58414
  case PhysicalType::INT128:
58391
- TemplatedGenerateKeys<hugeint_t>(input.data[0], input.size(), keys);
58415
+ TemplatedGenerateKeys<hugeint_t>(allocator, input.data[0], input.size(), keys);
58392
58416
  break;
58393
58417
  case PhysicalType::UINT8:
58394
- TemplatedGenerateKeys<uint8_t>(input.data[0], input.size(), keys);
58418
+ TemplatedGenerateKeys<uint8_t>(allocator, input.data[0], input.size(), keys);
58395
58419
  break;
58396
58420
  case PhysicalType::UINT16:
58397
- TemplatedGenerateKeys<uint16_t>(input.data[0], input.size(), keys);
58421
+ TemplatedGenerateKeys<uint16_t>(allocator, input.data[0], input.size(), keys);
58398
58422
  break;
58399
58423
  case PhysicalType::UINT32:
58400
- TemplatedGenerateKeys<uint32_t>(input.data[0], input.size(), keys);
58424
+ TemplatedGenerateKeys<uint32_t>(allocator, input.data[0], input.size(), keys);
58401
58425
  break;
58402
58426
  case PhysicalType::UINT64:
58403
- TemplatedGenerateKeys<uint64_t>(input.data[0], input.size(), keys);
58427
+ TemplatedGenerateKeys<uint64_t>(allocator, input.data[0], input.size(), keys);
58404
58428
  break;
58405
58429
  case PhysicalType::FLOAT:
58406
- TemplatedGenerateKeys<float>(input.data[0], input.size(), keys);
58430
+ TemplatedGenerateKeys<float>(allocator, input.data[0], input.size(), keys);
58407
58431
  break;
58408
58432
  case PhysicalType::DOUBLE:
58409
- TemplatedGenerateKeys<double>(input.data[0], input.size(), keys);
58433
+ TemplatedGenerateKeys<double>(allocator, input.data[0], input.size(), keys);
58410
58434
  break;
58411
58435
  case PhysicalType::VARCHAR:
58412
- TemplatedGenerateKeys<string_t>(input.data[0], input.size(), keys);
58436
+ TemplatedGenerateKeys<string_t>(allocator, input.data[0], input.size(), keys);
58413
58437
  break;
58414
58438
  default:
58415
58439
  throw InternalException("Invalid type for index");
@@ -58419,43 +58443,43 @@ void ART::GenerateKeys(DataChunk &input, vector<unique_ptr<Key>> &keys) {
58419
58443
  // for each of the remaining columns, concatenate
58420
58444
  switch (input.data[i].GetType().InternalType()) {
58421
58445
  case PhysicalType::BOOL:
58422
- ConcatenateKeys<bool>(input.data[i], input.size(), keys);
58446
+ ConcatenateKeys<bool>(allocator, input.data[i], input.size(), keys);
58423
58447
  break;
58424
58448
  case PhysicalType::INT8:
58425
- ConcatenateKeys<int8_t>(input.data[i], input.size(), keys);
58449
+ ConcatenateKeys<int8_t>(allocator, input.data[i], input.size(), keys);
58426
58450
  break;
58427
58451
  case PhysicalType::INT16:
58428
- ConcatenateKeys<int16_t>(input.data[i], input.size(), keys);
58452
+ ConcatenateKeys<int16_t>(allocator, input.data[i], input.size(), keys);
58429
58453
  break;
58430
58454
  case PhysicalType::INT32:
58431
- ConcatenateKeys<int32_t>(input.data[i], input.size(), keys);
58455
+ ConcatenateKeys<int32_t>(allocator, input.data[i], input.size(), keys);
58432
58456
  break;
58433
58457
  case PhysicalType::INT64:
58434
- ConcatenateKeys<int64_t>(input.data[i], input.size(), keys);
58458
+ ConcatenateKeys<int64_t>(allocator, input.data[i], input.size(), keys);
58435
58459
  break;
58436
58460
  case PhysicalType::INT128:
58437
- ConcatenateKeys<hugeint_t>(input.data[i], input.size(), keys);
58461
+ ConcatenateKeys<hugeint_t>(allocator, input.data[i], input.size(), keys);
58438
58462
  break;
58439
58463
  case PhysicalType::UINT8:
58440
- ConcatenateKeys<uint8_t>(input.data[i], input.size(), keys);
58464
+ ConcatenateKeys<uint8_t>(allocator, input.data[i], input.size(), keys);
58441
58465
  break;
58442
58466
  case PhysicalType::UINT16:
58443
- ConcatenateKeys<uint16_t>(input.data[i], input.size(), keys);
58467
+ ConcatenateKeys<uint16_t>(allocator, input.data[i], input.size(), keys);
58444
58468
  break;
58445
58469
  case PhysicalType::UINT32:
58446
- ConcatenateKeys<uint32_t>(input.data[i], input.size(), keys);
58470
+ ConcatenateKeys<uint32_t>(allocator, input.data[i], input.size(), keys);
58447
58471
  break;
58448
58472
  case PhysicalType::UINT64:
58449
- ConcatenateKeys<uint64_t>(input.data[i], input.size(), keys);
58473
+ ConcatenateKeys<uint64_t>(allocator, input.data[i], input.size(), keys);
58450
58474
  break;
58451
58475
  case PhysicalType::FLOAT:
58452
- ConcatenateKeys<float>(input.data[i], input.size(), keys);
58476
+ ConcatenateKeys<float>(allocator, input.data[i], input.size(), keys);
58453
58477
  break;
58454
58478
  case PhysicalType::DOUBLE:
58455
- ConcatenateKeys<double>(input.data[i], input.size(), keys);
58479
+ ConcatenateKeys<double>(allocator, input.data[i], input.size(), keys);
58456
58480
  break;
58457
58481
  case PhysicalType::VARCHAR:
58458
- ConcatenateKeys<string_t>(input.data[i], input.size(), keys);
58482
+ ConcatenateKeys<string_t>(allocator, input.data[i], input.size(), keys);
58459
58483
  break;
58460
58484
  default:
58461
58485
  throw InternalException("Invalid type for index");
@@ -58463,22 +58487,26 @@ void ART::GenerateKeys(DataChunk &input, vector<unique_ptr<Key>> &keys) {
58463
58487
  }
58464
58488
  }
58465
58489
 
58490
+ //===--------------------------------------------------------------------===//
58491
+ // Insert
58492
+ //===--------------------------------------------------------------------===//
58493
+
58466
58494
  struct KeySection {
58467
58495
  KeySection(idx_t start_p, idx_t end_p, idx_t depth_p, data_t key_byte_p)
58468
58496
  : start(start_p), end(end_p), depth(depth_p), key_byte(key_byte_p) {};
58469
- KeySection(idx_t start_p, idx_t end_p, vector<unique_ptr<Key>> &keys, KeySection &key_section)
58470
- : start(start_p), end(end_p), depth(key_section.depth + 1), key_byte(keys[end_p]->data[key_section.depth]) {};
58497
+ KeySection(idx_t start_p, idx_t end_p, vector<Key> &keys, KeySection &key_section)
58498
+ : start(start_p), end(end_p), depth(key_section.depth + 1), key_byte(keys[end_p].data[key_section.depth]) {};
58471
58499
  idx_t start;
58472
58500
  idx_t end;
58473
58501
  idx_t depth;
58474
58502
  data_t key_byte;
58475
58503
  };
58476
58504
 
58477
- void GetChildSections(vector<KeySection> &child_sections, vector<unique_ptr<Key>> &keys, KeySection &key_section) {
58505
+ void GetChildSections(vector<KeySection> &child_sections, vector<Key> &keys, KeySection &key_section) {
58478
58506
 
58479
58507
  idx_t child_start_idx = key_section.start;
58480
58508
  for (idx_t i = key_section.start + 1; i <= key_section.end; i++) {
58481
- if (keys[i - 1]->data[key_section.depth] != keys[i]->data[key_section.depth]) {
58509
+ if (keys[i - 1].data[key_section.depth] != keys[i].data[key_section.depth]) {
58482
58510
  child_sections.emplace_back(child_start_idx, i - 1, keys, key_section);
58483
58511
  child_start_idx = i;
58484
58512
  }
@@ -58486,15 +58514,14 @@ void GetChildSections(vector<KeySection> &child_sections, vector<unique_ptr<Key>
58486
58514
  child_sections.emplace_back(child_start_idx, key_section.end, keys, key_section);
58487
58515
  }
58488
58516
 
58489
- void Construct(vector<unique_ptr<Key>> &keys, row_t *row_ids, Node *&node, KeySection &key_section,
58490
- bool &has_constraint) {
58517
+ void Construct(vector<Key> &keys, row_t *row_ids, Node *&node, KeySection &key_section, bool &has_constraint) {
58491
58518
 
58492
58519
  D_ASSERT(key_section.start < keys.size());
58493
58520
  D_ASSERT(key_section.end < keys.size());
58494
58521
  D_ASSERT(key_section.start <= key_section.end);
58495
58522
 
58496
- auto &start_key = *keys[key_section.start];
58497
- auto &end_key = *keys[key_section.end];
58523
+ auto &start_key = keys[key_section.start];
58524
+ auto &end_key = keys[key_section.end];
58498
58525
 
58499
58526
  // increment the depth until we reach a leaf or find a mismatching byte
58500
58527
  auto prefix_start = key_section.depth;
@@ -58542,11 +58569,11 @@ void Construct(vector<unique_ptr<Key>> &keys, row_t *row_ids, Node *&node, KeySe
58542
58569
  }
58543
58570
  }
58544
58571
 
58545
- void FindFirstNotNullKey(vector<unique_ptr<Key>> &keys, bool &skipped_all_nulls, idx_t &start_idx) {
58572
+ void FindFirstNotNullKey(vector<Key> &keys, bool &skipped_all_nulls, idx_t &start_idx) {
58546
58573
 
58547
58574
  if (!skipped_all_nulls) {
58548
58575
  for (idx_t i = 0; i < keys.size(); i++) {
58549
- if (keys[i]) {
58576
+ if (!keys[i].Empty()) {
58550
58577
  start_idx = i;
58551
58578
  skipped_all_nulls = true;
58552
58579
  return;
@@ -58560,9 +58587,13 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
58560
58587
  auto payload_types = logical_types;
58561
58588
  payload_types.emplace_back(LogicalType::ROW_TYPE);
58562
58589
 
58590
+ ArenaAllocator arena_allocator(allocator);
58591
+ vector<Key> keys(STANDARD_VECTOR_SIZE);
58592
+
58563
58593
  auto skipped_all_nulls = false;
58564
58594
  auto temp_art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
58565
58595
  this->constraint_type, this->db);
58596
+
58566
58597
  for (;;) {
58567
58598
  DataChunk ordered_chunk;
58568
58599
  ordered_chunk.Initialize(allocator, payload_types);
@@ -58581,8 +58612,8 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
58581
58612
  D_ASSERT(logical_types[0] == ordered_chunk.data[0].GetType());
58582
58613
 
58583
58614
  // generate the keys for the given input
58584
- vector<unique_ptr<Key>> keys;
58585
- GenerateKeys(ordered_chunk, keys);
58615
+ arena_allocator.Reset();
58616
+ GenerateKeys(arena_allocator, ordered_chunk, keys);
58586
58617
 
58587
58618
  // we order NULLS FIRST, so we might have to skip nulls at the start of our sorted data
58588
58619
  idx_t start_idx = 0;
@@ -58608,16 +58639,21 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
58608
58639
  auto art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
58609
58640
  this->constraint_type, this->db);
58610
58641
  auto key_section = KeySection(start_idx, ordered_chunk.size() - 1, 0, 0);
58611
- auto has_constraint = IsPrimary() || IsUnique();
58642
+ auto has_constraint = IsUnique();
58612
58643
  Construct(keys, row_ids, art->tree, key_section, has_constraint);
58613
58644
 
58614
58645
  // merge art into temp_art
58615
- ART::Merge(temp_art.get(), art.get());
58646
+ if (!temp_art->MergeIndexes(lock, art.get())) {
58647
+ throw ConstraintException("Data contains duplicates on indexed column(s)");
58648
+ }
58616
58649
  }
58617
58650
 
58618
58651
  // NOTE: currently this code is only used for index creation, so we can assume that there are no
58619
- // duplicate violations between the existing index and the new data
58620
- ART::Merge(this, temp_art.get());
58652
+ // duplicate violations between the existing index and the new data,
58653
+ // so we do not need to revert any changes
58654
+ if (!this->MergeIndexes(lock, temp_art.get())) {
58655
+ throw ConstraintException("Data contains duplicates on indexed column(s)");
58656
+ }
58621
58657
  }
58622
58658
 
58623
58659
  bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
@@ -58625,39 +58661,35 @@ bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
58625
58661
  D_ASSERT(logical_types[0] == input.data[0].GetType());
58626
58662
 
58627
58663
  // generate the keys for the given input
58628
- vector<unique_ptr<Key>> keys;
58629
- GenerateKeys(input, keys);
58664
+ ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
58665
+ vector<Key> keys(input.size());
58666
+ GenerateKeys(arena_allocator, input, keys);
58630
58667
 
58631
58668
  // now insert the elements into the index
58632
58669
  row_ids.Flatten(input.size());
58633
58670
  auto row_identifiers = FlatVector::GetData<row_t>(row_ids);
58634
58671
  idx_t failed_index = DConstants::INVALID_INDEX;
58635
58672
  for (idx_t i = 0; i < input.size(); i++) {
58636
- if (!keys[i]) {
58673
+ if (keys[i].Empty()) {
58637
58674
  continue;
58638
58675
  }
58639
58676
 
58640
58677
  row_t row_id = row_identifiers[i];
58641
- if (!Insert(tree, move(keys[i]), 0, row_id)) {
58678
+ if (!Insert(tree, keys[i], 0, row_id)) {
58642
58679
  // failed to insert because of constraint violation
58643
58680
  failed_index = i;
58644
58681
  break;
58645
58682
  }
58646
58683
  }
58647
58684
  if (failed_index != DConstants::INVALID_INDEX) {
58648
- // failed to insert because of constraint violation: remove previously inserted entries
58649
- // generate keys again
58650
- keys.clear();
58651
- GenerateKeys(input, keys);
58652
- unique_ptr<Key> key;
58653
58685
 
58654
- // now erase the entries
58686
+ // failed to insert because of constraint violation: remove previously inserted entries
58655
58687
  for (idx_t i = 0; i < failed_index; i++) {
58656
- if (!keys[i]) {
58688
+ if (keys[i].Empty()) {
58657
58689
  continue;
58658
58690
  }
58659
58691
  row_t row_id = row_identifiers[i];
58660
- Erase(tree, *keys[i], 0, row_id);
58692
+ Erase(tree, keys[i], 0, row_id);
58661
58693
  }
58662
58694
  return false;
58663
58695
  }
@@ -58700,11 +58732,11 @@ bool ART::InsertToLeaf(Leaf &leaf, row_t row_id) {
58700
58732
  return true;
58701
58733
  }
58702
58734
 
58703
- bool ART::Insert(Node *&node, unique_ptr<Key> value, unsigned depth, row_t row_id) {
58704
- Key &key = *value;
58735
+ bool ART::Insert(Node *&node, Key &key, idx_t depth, row_t row_id) {
58736
+
58705
58737
  if (!node) {
58706
58738
  // node is currently empty, create a leaf here with the key
58707
- node = new Leaf(*value, depth, row_id);
58739
+ node = new Leaf(key, depth, row_id);
58708
58740
  return true;
58709
58741
  }
58710
58742
 
@@ -58731,7 +58763,7 @@ bool ART::Insert(Node *&node, unique_ptr<Key> value, unsigned depth, row_t row_i
58731
58763
  new_node->prefix = Prefix(key, depth, new_prefix_length);
58732
58764
  auto key_byte = node->prefix.Reduce(new_prefix_length);
58733
58765
  Node4::InsertChild(new_node, key_byte, node);
58734
- Node *leaf_node = new Leaf(*value, depth + new_prefix_length + 1, row_id);
58766
+ Node *leaf_node = new Leaf(key, depth + new_prefix_length + 1, row_id);
58735
58767
  Node4::InsertChild(new_node, key[depth + new_prefix_length], leaf_node);
58736
58768
  node = new_node;
58737
58769
  return true;
@@ -58748,7 +58780,7 @@ bool ART::Insert(Node *&node, unique_ptr<Key> value, unsigned depth, row_t row_i
58748
58780
  auto key_byte = node->prefix.Reduce(mismatch_pos);
58749
58781
  Node4::InsertChild(new_node, key_byte, node);
58750
58782
 
58751
- Node *leaf_node = new Leaf(*value, depth + mismatch_pos + 1, row_id);
58783
+ Node *leaf_node = new Leaf(key, depth + mismatch_pos + 1, row_id);
58752
58784
  Node4::InsertChild(new_node, key[depth + mismatch_pos], leaf_node);
58753
58785
  node = new_node;
58754
58786
  return true;
@@ -58761,11 +58793,11 @@ bool ART::Insert(Node *&node, unique_ptr<Key> value, unsigned depth, row_t row_i
58761
58793
  idx_t pos = node->GetChildPos(key[depth]);
58762
58794
  if (pos != DConstants::INVALID_INDEX) {
58763
58795
  auto child = node->GetChild(*this, pos);
58764
- bool insertion_result = Insert(child, move(value), depth + 1, row_id);
58796
+ bool insertion_result = Insert(child, key, depth + 1, row_id);
58765
58797
  node->ReplaceChildPointer(pos, child);
58766
58798
  return insertion_result;
58767
58799
  }
58768
- Node *new_node = new Leaf(*value, depth + 1, row_id);
58800
+ Node *new_node = new Leaf(key, depth + 1, row_id);
58769
58801
  Node::InsertChild(node, key[depth], new_node);
58770
58802
  return true;
58771
58803
  }
@@ -58781,20 +58813,21 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
58781
58813
  ExecuteExpressions(input, expression);
58782
58814
 
58783
58815
  // then generate the keys for the given input
58784
- vector<unique_ptr<Key>> keys;
58785
- GenerateKeys(expression, keys);
58816
+ ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
58817
+ vector<Key> keys(expression.size());
58818
+ GenerateKeys(arena_allocator, expression, keys);
58786
58819
 
58787
58820
  // now erase the elements from the database
58788
58821
  row_ids.Flatten(input.size());
58789
58822
  auto row_identifiers = FlatVector::GetData<row_t>(row_ids);
58790
58823
 
58791
58824
  for (idx_t i = 0; i < input.size(); i++) {
58792
- if (!keys[i]) {
58825
+ if (keys[i].Empty()) {
58793
58826
  continue;
58794
58827
  }
58795
- Erase(tree, *keys[i], 0, row_identifiers[i]);
58828
+ Erase(tree, keys[i], 0, row_identifiers[i]);
58796
58829
  #ifdef DEBUG
58797
- auto node = Lookup(tree, *keys[i], 0);
58830
+ auto node = Lookup(tree, keys[i], 0);
58798
58831
  if (node) {
58799
58832
  auto leaf = static_cast<Leaf *>(node);
58800
58833
  for (idx_t k = 0; k < leaf->count; k++) {
@@ -58805,7 +58838,7 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
58805
58838
  }
58806
58839
  }
58807
58840
 
58808
- void ART::Erase(Node *&node, Key &key, unsigned depth, row_t row_id) {
58841
+ void ART::Erase(Node *&node, Key &key, idx_t depth, row_t row_id) {
58809
58842
  if (!node) {
58810
58843
  return;
58811
58844
  }
@@ -58853,43 +58886,43 @@ void ART::Erase(Node *&node, Key &key, unsigned depth, row_t row_id) {
58853
58886
  //===--------------------------------------------------------------------===//
58854
58887
  // Point Query
58855
58888
  //===--------------------------------------------------------------------===//
58856
- static unique_ptr<Key> CreateKey(ART &art, PhysicalType type, Value &value) {
58889
+ static Key CreateKey(ArenaAllocator &allocator, PhysicalType type, Value &value) {
58857
58890
  D_ASSERT(type == value.type().InternalType());
58858
58891
  switch (type) {
58859
58892
  case PhysicalType::BOOL:
58860
- return Key::CreateKey<bool>(value);
58893
+ return Key::CreateKey<bool>(allocator, value);
58861
58894
  case PhysicalType::INT8:
58862
- return Key::CreateKey<int8_t>(value);
58895
+ return Key::CreateKey<int8_t>(allocator, value);
58863
58896
  case PhysicalType::INT16:
58864
- return Key::CreateKey<int16_t>(value);
58897
+ return Key::CreateKey<int16_t>(allocator, value);
58865
58898
  case PhysicalType::INT32:
58866
- return Key::CreateKey<int32_t>(value);
58899
+ return Key::CreateKey<int32_t>(allocator, value);
58867
58900
  case PhysicalType::INT64:
58868
- return Key::CreateKey<int64_t>(value);
58901
+ return Key::CreateKey<int64_t>(allocator, value);
58869
58902
  case PhysicalType::UINT8:
58870
- return Key::CreateKey<uint8_t>(value);
58903
+ return Key::CreateKey<uint8_t>(allocator, value);
58871
58904
  case PhysicalType::UINT16:
58872
- return Key::CreateKey<uint16_t>(value);
58905
+ return Key::CreateKey<uint16_t>(allocator, value);
58873
58906
  case PhysicalType::UINT32:
58874
- return Key::CreateKey<uint32_t>(value);
58907
+ return Key::CreateKey<uint32_t>(allocator, value);
58875
58908
  case PhysicalType::UINT64:
58876
- return Key::CreateKey<uint64_t>(value);
58909
+ return Key::CreateKey<uint64_t>(allocator, value);
58877
58910
  case PhysicalType::INT128:
58878
- return Key::CreateKey<hugeint_t>(value);
58911
+ return Key::CreateKey<hugeint_t>(allocator, value);
58879
58912
  case PhysicalType::FLOAT:
58880
- return Key::CreateKey<float>(value);
58913
+ return Key::CreateKey<float>(allocator, value);
58881
58914
  case PhysicalType::DOUBLE:
58882
- return Key::CreateKey<double>(value);
58915
+ return Key::CreateKey<double>(allocator, value);
58883
58916
  case PhysicalType::VARCHAR:
58884
- return Key::CreateKey<string_t>(value);
58917
+ return Key::CreateKey<string_t>(allocator, value);
58885
58918
  default:
58886
58919
  throw InternalException("Invalid type for index");
58887
58920
  }
58888
58921
  }
58889
58922
 
58890
- bool ART::SearchEqual(ARTIndexScanState *state, idx_t max_count, vector<row_t> &result_ids) {
58891
- auto key = CreateKey(*this, types[0], state->values[0]);
58892
- auto leaf = static_cast<Leaf *>(Lookup(tree, *key, 0));
58923
+ bool ART::SearchEqual(Key &key, idx_t max_count, vector<row_t> &result_ids) {
58924
+
58925
+ auto leaf = static_cast<Leaf *>(Lookup(tree, key, 0));
58893
58926
  if (!leaf) {
58894
58927
  return true;
58895
58928
  }
@@ -58903,17 +58936,17 @@ bool ART::SearchEqual(ARTIndexScanState *state, idx_t max_count, vector<row_t> &
58903
58936
  return true;
58904
58937
  }
58905
58938
 
58906
- void ART::SearchEqualJoinNoFetch(Value &equal_value, idx_t &result_size) {
58907
- //! We need to look for a leaf
58908
- auto key = CreateKey(*this, types[0], equal_value);
58909
- auto leaf = (Leaf *)(Lookup(tree, *key, 0));
58939
+ void ART::SearchEqualJoinNoFetch(Key &key, idx_t &result_size) {
58940
+
58941
+ // we need to look for a leaf
58942
+ auto leaf = Lookup(tree, key, 0);
58910
58943
  if (!leaf) {
58911
58944
  return;
58912
58945
  }
58913
58946
  result_size = leaf->count;
58914
58947
  }
58915
58948
 
58916
- Node *ART::Lookup(Node *node, Key &key, unsigned depth) {
58949
+ Leaf *ART::Lookup(Node *node, Key &key, idx_t depth) {
58917
58950
  while (node) {
58918
58951
  if (node->type == NodeType::NLeaf) {
58919
58952
  auto leaf = (Leaf *)node;
@@ -58924,7 +58957,7 @@ Node *ART::Lookup(Node *node, Key &key, unsigned depth) {
58924
58957
  return nullptr;
58925
58958
  }
58926
58959
  }
58927
- return node;
58960
+ return (Leaf *)node;
58928
58961
  }
58929
58962
  if (node->prefix.Size()) {
58930
58963
  for (idx_t pos = 0; pos < node->prefix.Size(); pos++) {
@@ -58950,112 +58983,127 @@ Node *ART::Lookup(Node *node, Key &key, unsigned depth) {
58950
58983
  // Returns: True (If found leaf >= key)
58951
58984
  // False (Otherwise)
58952
58985
  //===--------------------------------------------------------------------===//
58953
- bool ART::SearchGreater(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector<row_t> &result_ids) {
58986
+ bool ART::SearchGreater(ARTIndexScanState *state, Key &key, bool inclusive, idx_t max_count,
58987
+ vector<row_t> &result_ids) {
58988
+
58954
58989
  Iterator *it = &state->iterator;
58955
- auto key = CreateKey(*this, types[0], state->values[0]);
58956
58990
 
58957
58991
  // greater than scan: first set the iterator to the node at which we will start our scan by finding the lowest node
58958
58992
  // that satisfies our requirement
58959
58993
  if (!it->art) {
58960
58994
  it->art = this;
58961
- bool found = it->LowerBound(tree, *key, inclusive);
58995
+ bool found = it->LowerBound(tree, key, inclusive);
58962
58996
  if (!found) {
58963
58997
  return true;
58964
58998
  }
58965
58999
  }
58966
59000
  // after that we continue the scan; we don't need to check the bounds as any value following this value is
58967
59001
  // automatically bigger and hence satisfies our predicate
58968
- return it->Scan(nullptr, max_count, result_ids, false);
59002
+ Key empty_key = Key();
59003
+ return it->Scan(empty_key, max_count, result_ids, false);
58969
59004
  }
58970
59005
 
58971
59006
  //===--------------------------------------------------------------------===//
58972
59007
  // Less Than
58973
59008
  //===--------------------------------------------------------------------===//
58974
- bool ART::SearchLess(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector<row_t> &result_ids) {
59009
+ bool ART::SearchLess(ARTIndexScanState *state, Key &upper_bound, bool inclusive, idx_t max_count,
59010
+ vector<row_t> &result_ids) {
59011
+
58975
59012
  if (!tree) {
58976
59013
  return true;
58977
59014
  }
58978
59015
 
58979
59016
  Iterator *it = &state->iterator;
58980
- auto upper_bound = CreateKey(*this, types[0], state->values[0]);
58981
59017
 
58982
59018
  if (!it->art) {
58983
59019
  it->art = this;
58984
59020
  // first find the minimum value in the ART: we start scanning from this value
58985
59021
  it->FindMinimum(*tree);
58986
59022
  // early out min value higher than upper bound query
58987
- if (it->cur_key > *upper_bound) {
59023
+ if (it->cur_key > upper_bound) {
58988
59024
  return true;
58989
59025
  }
58990
59026
  }
58991
59027
  // now continue the scan until we reach the upper bound
58992
- return it->Scan(upper_bound.get(), max_count, result_ids, inclusive);
59028
+ return it->Scan(upper_bound, max_count, result_ids, inclusive);
58993
59029
  }
58994
59030
 
58995
59031
  //===--------------------------------------------------------------------===//
58996
59032
  // Closed Range Query
58997
59033
  //===--------------------------------------------------------------------===//
58998
- bool ART::SearchCloseRange(ARTIndexScanState *state, bool left_inclusive, bool right_inclusive, idx_t max_count,
58999
- vector<row_t> &result_ids) {
59000
- auto lower_bound = CreateKey(*this, types[0], state->values[0]);
59001
- auto upper_bound = CreateKey(*this, types[0], state->values[1]);
59034
+ bool ART::SearchCloseRange(ARTIndexScanState *state, Key &lower_bound, Key &upper_bound, bool left_inclusive,
59035
+ bool right_inclusive, idx_t max_count, vector<row_t> &result_ids) {
59036
+
59002
59037
  Iterator *it = &state->iterator;
59038
+
59003
59039
  // first find the first node that satisfies the left predicate
59004
59040
  if (!it->art) {
59005
59041
  it->art = this;
59006
- bool found = it->LowerBound(tree, *lower_bound, left_inclusive);
59042
+ bool found = it->LowerBound(tree, lower_bound, left_inclusive);
59007
59043
  if (!found) {
59008
59044
  return true;
59009
59045
  }
59010
59046
  }
59011
59047
  // now continue the scan until we reach the upper bound
59012
- return it->Scan(upper_bound.get(), max_count, result_ids, right_inclusive);
59048
+ return it->Scan(upper_bound, max_count, result_ids, right_inclusive);
59013
59049
  }
59014
59050
 
59015
59051
  bool ART::Scan(Transaction &transaction, DataTable &table, IndexScanState &table_state, idx_t max_count,
59016
59052
  vector<row_t> &result_ids) {
59053
+
59017
59054
  auto state = (ARTIndexScanState *)&table_state;
59055
+ vector<row_t> row_ids;
59056
+ bool success;
59018
59057
 
59058
+ // FIXME: the key directly owning the data for a single key might be more efficient
59019
59059
  D_ASSERT(state->values[0].type().InternalType() == types[0]);
59060
+ ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
59061
+ auto key = CreateKey(arena_allocator, types[0], state->values[0]);
59020
59062
 
59021
- vector<row_t> row_ids;
59022
- bool success;
59023
59063
  if (state->values[1].IsNull()) {
59024
- lock_guard<mutex> l(lock);
59064
+
59025
59065
  // single predicate
59066
+ lock_guard<mutex> l(lock);
59026
59067
  switch (state->expressions[0]) {
59027
59068
  case ExpressionType::COMPARE_EQUAL:
59028
- success = SearchEqual(state, max_count, row_ids);
59069
+ success = SearchEqual(key, max_count, row_ids);
59029
59070
  break;
59030
59071
  case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
59031
- success = SearchGreater(state, true, max_count, row_ids);
59072
+ success = SearchGreater(state, key, true, max_count, row_ids);
59032
59073
  break;
59033
59074
  case ExpressionType::COMPARE_GREATERTHAN:
59034
- success = SearchGreater(state, false, max_count, row_ids);
59075
+ success = SearchGreater(state, key, false, max_count, row_ids);
59035
59076
  break;
59036
59077
  case ExpressionType::COMPARE_LESSTHANOREQUALTO:
59037
- success = SearchLess(state, true, max_count, row_ids);
59078
+ success = SearchLess(state, key, true, max_count, row_ids);
59038
59079
  break;
59039
59080
  case ExpressionType::COMPARE_LESSTHAN:
59040
- success = SearchLess(state, false, max_count, row_ids);
59081
+ success = SearchLess(state, key, false, max_count, row_ids);
59041
59082
  break;
59042
59083
  default:
59043
59084
  throw InternalException("Operation not implemented");
59044
59085
  }
59086
+
59045
59087
  } else {
59046
- lock_guard<mutex> l(lock);
59088
+
59047
59089
  // two predicates
59090
+ lock_guard<mutex> l(lock);
59091
+
59048
59092
  D_ASSERT(state->values[1].type().InternalType() == types[0]);
59093
+ auto upper_bound = CreateKey(arena_allocator, types[0], state->values[1]);
59094
+
59049
59095
  bool left_inclusive = state->expressions[0] == ExpressionType ::COMPARE_GREATERTHANOREQUALTO;
59050
59096
  bool right_inclusive = state->expressions[1] == ExpressionType ::COMPARE_LESSTHANOREQUALTO;
59051
- success = SearchCloseRange(state, left_inclusive, right_inclusive, max_count, row_ids);
59097
+ success = SearchCloseRange(state, key, upper_bound, left_inclusive, right_inclusive, max_count, row_ids);
59052
59098
  }
59099
+
59053
59100
  if (!success) {
59054
59101
  return false;
59055
59102
  }
59056
59103
  if (row_ids.empty()) {
59057
59104
  return true;
59058
59105
  }
59106
+
59059
59107
  // sort the row ids
59060
59108
  sort(row_ids.begin(), row_ids.end());
59061
59109
  // duplicate eliminate the row ids and append them to the row ids of the state
@@ -59084,14 +59132,15 @@ void ART::VerifyExistence(DataChunk &chunk, VerifyExistenceType verify_type, str
59084
59132
  ExecuteExpressions(chunk, expression_chunk);
59085
59133
 
59086
59134
  // generate the keys for the given input
59087
- vector<unique_ptr<Key>> keys;
59088
- GenerateKeys(expression_chunk, keys);
59135
+ ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
59136
+ vector<Key> keys(expression_chunk.size());
59137
+ GenerateKeys(arena_allocator, expression_chunk, keys);
59089
59138
 
59090
59139
  for (idx_t i = 0; i < chunk.size(); i++) {
59091
- if (!keys[i]) {
59140
+ if (keys[i].Empty()) {
59092
59141
  continue;
59093
59142
  }
59094
- Node *node_ptr = Lookup(tree, *keys[i], 0);
59143
+ Node *node_ptr = Lookup(tree, keys[i], 0);
59095
59144
  bool throw_exception =
59096
59145
  verify_type == VerifyExistenceType::APPEND_FK ? node_ptr == nullptr : node_ptr != nullptr;
59097
59146
  if (!throw_exception) {
@@ -59148,15 +59197,24 @@ BlockPointer ART::Serialize(duckdb::MetaBlockWriter &writer) {
59148
59197
  //===--------------------------------------------------------------------===//
59149
59198
  // Merge ARTs
59150
59199
  //===--------------------------------------------------------------------===//
59151
- void ART::Merge(ART *l_art, ART *r_art) {
59200
+ bool ART::MergeIndexes(IndexLock &state, Index *other_index) {
59152
59201
 
59153
- if (!l_art->tree) {
59154
- l_art->tree = r_art->tree;
59155
- r_art->tree = nullptr;
59156
- return;
59202
+ auto other_art = (ART *)other_index;
59203
+
59204
+ if (!this->tree) {
59205
+ this->tree = other_art->tree;
59206
+ other_art->tree = nullptr;
59207
+ return true;
59157
59208
  }
59158
59209
 
59159
- Node::MergeARTs(l_art, r_art);
59210
+ return Node::MergeARTs(this, other_art);
59211
+ }
59212
+
59213
+ string ART::ToString() {
59214
+ if (tree) {
59215
+ return tree->ToString(*this);
59216
+ }
59217
+ return "[empty]";
59160
59218
  }
59161
59219
 
59162
59220
  } // namespace duckdb
@@ -59166,25 +59224,41 @@ void ART::Merge(ART *l_art, ART *r_art) {
59166
59224
 
59167
59225
  namespace duckdb {
59168
59226
 
59169
- Key::Key(unique_ptr<data_t[]> data, idx_t len) : len(len), data(move(data)) {
59227
+ Key::Key() : len(0) {
59170
59228
  }
59171
59229
 
59172
- Key::Key(idx_t len) : len(len) {
59173
- data = unique_ptr<data_t[]>(new data_t[len]);
59230
+ Key::Key(data_ptr_t data, idx_t len) : len(len), data(data) {
59231
+ }
59232
+
59233
+ Key::Key(ArenaAllocator &allocator, idx_t len) : len(len) {
59234
+ data = allocator.Allocate(len);
59174
59235
  }
59175
59236
 
59176
59237
  template <>
59177
- unique_ptr<Key> Key::CreateKey(string_t value) {
59238
+ Key Key::CreateKey(ArenaAllocator &allocator, string_t value) {
59178
59239
  idx_t len = value.GetSize() + 1;
59179
- auto data = unique_ptr<data_t[]>(new data_t[len]);
59180
- memcpy(data.get(), value.GetDataUnsafe(), len - 1);
59240
+ auto data = allocator.Allocate(len);
59241
+ memcpy(data, value.GetDataUnsafe(), len - 1);
59181
59242
  data[len - 1] = '\0';
59182
- return make_unique<Key>(move(data), len);
59243
+ return Key(data, len);
59183
59244
  }
59184
59245
 
59185
59246
  template <>
59186
- unique_ptr<Key> Key::CreateKey(const char *value) {
59187
- return Key::CreateKey(string_t(value, strlen(value)));
59247
+ Key Key::CreateKey(ArenaAllocator &allocator, const char *value) {
59248
+ return Key::CreateKey(allocator, string_t(value, strlen(value)));
59249
+ }
59250
+
59251
+ template <>
59252
+ void Key::CreateKey(ArenaAllocator &allocator, Key &key, string_t value) {
59253
+ key.len = value.GetSize() + 1;
59254
+ key.data = allocator.Allocate(key.len);
59255
+ memcpy(key.data, value.GetDataUnsafe(), key.len - 1);
59256
+ key.data[key.len - 1] = '\0';
59257
+ }
59258
+
59259
+ template <>
59260
+ void Key::CreateKey(ArenaAllocator &allocator, Key &key, const char *value) {
59261
+ Key::CreateKey(allocator, key, string_t(value, strlen(value)));
59188
59262
  }
59189
59263
 
59190
59264
  bool Key::operator>(const Key &k) const {
@@ -59235,6 +59309,19 @@ bool Key::operator==(const Key &k) const {
59235
59309
  bool Key::ByteMatches(Key &other, idx_t &depth) {
59236
59310
  return data[depth] == other[depth];
59237
59311
  }
59312
+
59313
+ bool Key::Empty() {
59314
+ return len == 0;
59315
+ }
59316
+
59317
+ void Key::ConcatenateKey(ArenaAllocator &allocator, Key &other_key) {
59318
+
59319
+ auto compound_data = allocator.Allocate(len + other_key.len);
59320
+ memcpy(compound_data, data, len);
59321
+ memcpy(compound_data + len, other_key.data, other_key.len);
59322
+ len += other_key.len;
59323
+ data = compound_data;
59324
+ }
59238
59325
  } // namespace duckdb
59239
59326
 
59240
59327
 
@@ -59358,16 +59445,16 @@ void Iterator::PushKey(Node *cur_node, uint16_t pos) {
59358
59445
  }
59359
59446
  }
59360
59447
 
59361
- bool Iterator::Scan(Key *bound, idx_t max_count, vector<row_t> &result_ids, bool is_inclusive) {
59448
+ bool Iterator::Scan(Key &bound, idx_t max_count, vector<row_t> &result_ids, bool is_inclusive) {
59362
59449
  bool has_next;
59363
59450
  do {
59364
- if (bound) {
59451
+ if (!bound.Empty()) {
59365
59452
  if (is_inclusive) {
59366
- if (cur_key > *bound) {
59453
+ if (cur_key > bound) {
59367
59454
  break;
59368
59455
  }
59369
59456
  } else {
59370
- if (cur_key >= *bound) {
59457
+ if (cur_key >= bound) {
59371
59458
  break;
59372
59459
  }
59373
59460
  }
@@ -59589,6 +59676,27 @@ void Leaf::Remove(row_t row_id) {
59589
59676
  }
59590
59677
  }
59591
59678
 
59679
+ string Leaf::ToString(Node *node) {
59680
+
59681
+ Leaf *leaf = (Leaf *)node;
59682
+ string str = "Leaf: [";
59683
+ for (idx_t i = 0; i < leaf->count; i++) {
59684
+ str += i == 0 ? to_string(leaf->row_ids[i]) : ", " + to_string(leaf->row_ids[i]);
59685
+ }
59686
+ return str + "]";
59687
+ }
59688
+
59689
+ void Leaf::Merge(Node *&l_node, Node *&r_node) {
59690
+
59691
+ Leaf *l_n = (Leaf *)l_node;
59692
+ Leaf *r_n = (Leaf *)r_node;
59693
+
59694
+ // append non-duplicate row_ids to l_n
59695
+ for (idx_t i = 0; i < r_n->count; i++) {
59696
+ l_n->Insert(r_n->GetRowId(i));
59697
+ }
59698
+ }
59699
+
59592
59700
  BlockPointer Leaf::Serialize(duckdb::MetaBlockWriter &writer) {
59593
59701
  auto ptr = writer.GetBlockPointer();
59594
59702
  // Write Node Type
@@ -59616,21 +59724,6 @@ Leaf *Leaf::Deserialize(MetaBlockReader &reader) {
59616
59724
  return new Leaf(move(elements), num_elements, prefix);
59617
59725
  }
59618
59726
 
59619
- void Leaf::Merge(bool &has_constraint, Node *&l_node, Node *&r_node) {
59620
-
59621
- Leaf *l_n = (Leaf *)l_node;
59622
- Leaf *r_n = (Leaf *)r_node;
59623
-
59624
- // append non-duplicate row_ids to l_n
59625
- for (idx_t i = 0; i < r_n->count; i++) {
59626
- l_n->Insert(r_n->GetRowId(i));
59627
- }
59628
-
59629
- if (has_constraint && l_n->count > 1) {
59630
- throw ConstraintException("Data contains duplicates on indexed column(s)");
59631
- }
59632
- }
59633
-
59634
59727
  } // namespace duckdb
59635
59728
 
59636
59729
 
@@ -59782,6 +59875,36 @@ void Node::New(NodeType &type, Node *&node) {
59782
59875
  }
59783
59876
  }
59784
59877
 
59878
+ string Node::ToString(ART &art) {
59879
+
59880
+ string str = "Node";
59881
+ switch (this->type) {
59882
+ case NodeType::NLeaf:
59883
+ return Leaf::ToString(this);
59884
+ case NodeType::N4:
59885
+ str += to_string(Node4::GetSize());
59886
+ break;
59887
+ case NodeType::N16:
59888
+ str += to_string(Node16::GetSize());
59889
+ break;
59890
+ case NodeType::N48:
59891
+ str += to_string(Node48::GetSize());
59892
+ break;
59893
+ case NodeType::N256:
59894
+ str += to_string(Node256::GetSize());
59895
+ break;
59896
+ }
59897
+
59898
+ str += ": [";
59899
+ auto next_pos = GetNextPos(DConstants::INVALID_INDEX);
59900
+ while (next_pos != DConstants::INVALID_INDEX) {
59901
+ auto child = GetChild(art, next_pos);
59902
+ str += "(" + to_string(next_pos) + ", " + child->ToString(art) + ")";
59903
+ next_pos = GetNextPos(next_pos);
59904
+ }
59905
+ return str + "]";
59906
+ }
59907
+
59785
59908
  BlockPointer Node::SerializeInternal(ART &art, duckdb::MetaBlockWriter &writer, InternalType &internal_type) {
59786
59909
  // Iterate through children and annotate their offsets
59787
59910
  vector<BlockPointer> child_offsets;
@@ -59879,7 +60002,7 @@ void UpdateParentsOfNodes(Node *&l_node, Node *&r_node, ParentsOfNodes &parents)
59879
60002
  }
59880
60003
  }
59881
60004
 
59882
- void Merge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
60005
+ bool Merge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
59883
60006
 
59884
60007
  // always try to merge the smaller node into the bigger node
59885
60008
  // because maybe there is enough free space in the bigger node to fit the smaller one
@@ -59904,13 +60027,16 @@ void Merge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
59904
60027
  case NodeType::NLeaf:
59905
60028
  D_ASSERT(info.l_node->type == NodeType::NLeaf);
59906
60029
  D_ASSERT(info.r_node->type == NodeType::NLeaf);
59907
- auto has_constraint = info.l_art->IsPrimary() || info.l_art->IsUnique();
59908
- return Leaf::Merge(has_constraint, info.l_node, info.r_node);
60030
+ if (info.l_art->IsUnique()) {
60031
+ return false;
60032
+ }
60033
+ Leaf::Merge(info.l_node, info.r_node);
60034
+ return true;
59909
60035
  }
59910
60036
  throw InternalException("Invalid node type for right node in merge.");
59911
60037
  }
59912
60038
 
59913
- void ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
60039
+ bool ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
59914
60040
 
59915
60041
  auto &l_node = info.l_node;
59916
60042
  auto &r_node = info.r_node;
@@ -59951,7 +60077,7 @@ void ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &paren
59951
60077
  Node::InsertChild(l_node, mismatch_byte, r_node);
59952
60078
  UpdateParentsOfNodes(l_node, null_parent, parents);
59953
60079
  r_node = nullptr;
59954
- return;
60080
+ return true;
59955
60081
  }
59956
60082
 
59957
60083
  // recurse
@@ -59978,9 +60104,10 @@ void ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &paren
59978
60104
  l_node = new_node;
59979
60105
  UpdateParentsOfNodes(l_node, null_parent, parents);
59980
60106
  r_node = nullptr;
60107
+ return true;
59981
60108
  }
59982
60109
 
59983
- void Node::MergeAtByte(MergeInfo &info, idx_t depth, idx_t &l_child_pos, idx_t &r_pos, uint8_t &key_byte,
60110
+ bool Node::MergeAtByte(MergeInfo &info, idx_t depth, idx_t &l_child_pos, idx_t &r_pos, uint8_t &key_byte,
59984
60111
  Node *&l_parent, idx_t l_pos) {
59985
60112
 
59986
60113
  auto r_child = info.r_node->GetChild(*info.r_art, r_pos);
@@ -59992,22 +60119,22 @@ void Node::MergeAtByte(MergeInfo &info, idx_t depth, idx_t &l_child_pos, idx_t &
59992
60119
  l_parent->ReplaceChildPointer(l_pos, info.l_node);
59993
60120
  }
59994
60121
  info.r_node->ReplaceChildPointer(r_pos, nullptr);
59995
- return;
60122
+ return true;
59996
60123
  }
59997
60124
 
59998
60125
  // recurse
59999
60126
  auto l_child = info.l_node->GetChild(*info.l_art, l_child_pos);
60000
60127
  MergeInfo child_info(info.l_art, info.r_art, l_child, r_child);
60001
60128
  ParentsOfNodes child_parents(info.l_node, l_child_pos, info.r_node, r_pos);
60002
- ResolvePrefixesAndMerge(child_info, depth + 1, child_parents);
60129
+ return ResolvePrefixesAndMerge(child_info, depth + 1, child_parents);
60003
60130
  }
60004
60131
 
60005
- void Node::MergeARTs(ART *l_art, ART *r_art) {
60132
+ bool Node::MergeARTs(ART *l_art, ART *r_art) {
60006
60133
 
60007
60134
  Node *null_parent = nullptr;
60008
60135
  MergeInfo info(l_art, r_art, l_art->tree, r_art->tree);
60009
60136
  ParentsOfNodes parents(null_parent, 0, null_parent, 0);
60010
- ResolvePrefixesAndMerge(info, 0, parents);
60137
+ return ResolvePrefixesAndMerge(info, 0, parents);
60011
60138
  }
60012
60139
 
60013
60140
  } // namespace duckdb
@@ -60137,15 +60264,18 @@ void Node16::EraseChild(Node *&node, int pos, ART &art) {
60137
60264
  }
60138
60265
  }
60139
60266
 
60140
- void Node16::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
60267
+ bool Node16::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
60141
60268
 
60142
60269
  Node16 *r_n = (Node16 *)info.r_node;
60143
60270
 
60144
60271
  for (idx_t i = 0; i < info.r_node->count; i++) {
60145
60272
 
60146
60273
  auto l_child_pos = info.l_node->GetChildPos(r_n->key[i]);
60147
- Node::MergeAtByte(info, depth, l_child_pos, i, r_n->key[i], l_parent, l_pos);
60274
+ if (!Node::MergeAtByte(info, depth, l_child_pos, i, r_n->key[i], l_parent, l_pos)) {
60275
+ return false;
60276
+ }
60148
60277
  }
60278
+ return true;
60149
60279
  }
60150
60280
 
60151
60281
  idx_t Node16::GetSize() {
@@ -60237,16 +60367,19 @@ void Node256::EraseChild(Node *&node, int pos, ART &art) {
60237
60367
  }
60238
60368
  }
60239
60369
 
60240
- void Node256::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
60370
+ bool Node256::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
60241
60371
 
60242
60372
  for (idx_t i = 0; i < 256; i++) {
60243
60373
  if (info.r_node->GetChildPos(i) != DConstants::INVALID_INDEX) {
60244
60374
 
60245
60375
  auto l_child_pos = info.l_node->GetChildPos(i);
60246
60376
  auto key_byte = (uint8_t)i;
60247
- Node::MergeAtByte(info, depth, l_child_pos, i, key_byte, l_parent, l_pos);
60377
+ if (!Node::MergeAtByte(info, depth, l_child_pos, i, key_byte, l_parent, l_pos)) {
60378
+ return false;
60379
+ }
60248
60380
  }
60249
60381
  }
60382
+ return true;
60250
60383
  }
60251
60384
 
60252
60385
  idx_t Node256::GetSize() {
@@ -60373,15 +60506,18 @@ void Node4::EraseChild(Node *&node, int pos, ART &art) {
60373
60506
  }
60374
60507
  }
60375
60508
 
60376
- void Node4::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
60509
+ bool Node4::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
60377
60510
 
60378
60511
  Node4 *r_n = (Node4 *)info.r_node;
60379
60512
 
60380
60513
  for (idx_t i = 0; i < info.r_node->count; i++) {
60381
60514
 
60382
60515
  auto l_child_pos = info.l_node->GetChildPos(r_n->key[i]);
60383
- Node::MergeAtByte(info, depth, l_child_pos, i, r_n->key[i], l_parent, l_pos);
60516
+ if (!Node::MergeAtByte(info, depth, l_child_pos, i, r_n->key[i], l_parent, l_pos)) {
60517
+ return false;
60518
+ }
60384
60519
  }
60520
+ return true;
60385
60521
  }
60386
60522
 
60387
60523
  idx_t Node4::GetSize() {
@@ -60505,7 +60641,7 @@ void Node48::EraseChild(Node *&node, int pos, ART &art) {
60505
60641
  }
60506
60642
  }
60507
60643
 
60508
- void Node48::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
60644
+ bool Node48::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
60509
60645
 
60510
60646
  Node48 *r_n = (Node48 *)info.r_node;
60511
60647
 
@@ -60514,9 +60650,12 @@ void Node48::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
60514
60650
 
60515
60651
  auto l_child_pos = info.l_node->GetChildPos(i);
60516
60652
  auto key_byte = (uint8_t)i;
60517
- Node::MergeAtByte(info, depth, l_child_pos, i, key_byte, l_parent, l_pos);
60653
+ if (!Node::MergeAtByte(info, depth, l_child_pos, i, key_byte, l_parent, l_pos)) {
60654
+ return false;
60655
+ }
60518
60656
  }
60519
60657
  }
60658
+ return true;
60520
60659
  }
60521
60660
 
60522
60661
  idx_t Node48::GetSize() {
@@ -72728,7 +72867,8 @@ namespace duckdb {
72728
72867
 
72729
72868
  class IndexJoinOperatorState : public OperatorState {
72730
72869
  public:
72731
- IndexJoinOperatorState(Allocator &allocator, const PhysicalIndexJoin &op) : probe_executor(allocator) {
72870
+ IndexJoinOperatorState(Allocator &allocator, const PhysicalIndexJoin &op)
72871
+ : probe_executor(allocator), arena_allocator(allocator), keys(STANDARD_VECTOR_SIZE) {
72732
72872
  rhs_rows.resize(STANDARD_VECTOR_SIZE);
72733
72873
  result_sizes.resize(STANDARD_VECTOR_SIZE);
72734
72874
 
@@ -72750,10 +72890,14 @@ public:
72750
72890
  DataChunk join_keys;
72751
72891
  DataChunk rhs_chunk;
72752
72892
  SelectionVector rhs_sel;
72893
+
72753
72894
  //! Vector of rows that mush be fetched for every LHS key
72754
72895
  vector<vector<row_t>> rhs_rows;
72755
72896
  ExpressionExecutor probe_executor;
72756
72897
 
72898
+ ArenaAllocator arena_allocator;
72899
+ vector<Key> keys;
72900
+
72757
72901
  public:
72758
72902
  void Finalize(PhysicalOperator *op, ExecutionContext &context) override {
72759
72903
  context.thread.profiler.Flush(op, &probe_executor, "probe_executor", 0);
@@ -72858,22 +73002,25 @@ void PhysicalIndexJoin::Output(ExecutionContext &context, DataChunk &input, Data
72858
73002
  }
72859
73003
 
72860
73004
  void PhysicalIndexJoin::GetRHSMatches(ExecutionContext &context, DataChunk &input, OperatorState &state_p) const {
73005
+
72861
73006
  auto &state = (IndexJoinOperatorState &)state_p;
72862
73007
  auto &art = (ART &)*index;
72863
- auto &transaction = Transaction::GetTransaction(context.client);
73008
+
73009
+ // generate the keys for this chunk
73010
+ state.arena_allocator.Reset();
73011
+ ART::GenerateKeys(state.arena_allocator, state.join_keys, state.keys);
73012
+
72864
73013
  for (idx_t i = 0; i < input.size(); i++) {
72865
- auto equal_value = state.join_keys.GetValue(0, i);
72866
- auto index_state = art.InitializeScanSinglePredicate(transaction, equal_value, ExpressionType::COMPARE_EQUAL);
72867
73014
  state.rhs_rows[i].clear();
72868
- if (!equal_value.IsNull()) {
73015
+ if (!state.keys[i].Empty()) {
72869
73016
  if (fetch_types.empty()) {
72870
73017
  IndexLock lock;
72871
73018
  index->InitializeLock(lock);
72872
- art.SearchEqualJoinNoFetch(equal_value, state.result_sizes[i]);
73019
+ art.SearchEqualJoinNoFetch(state.keys[i], state.result_sizes[i]);
72873
73020
  } else {
72874
73021
  IndexLock lock;
72875
73022
  index->InitializeLock(lock);
72876
- art.SearchEqual((ARTIndexScanState *)index_state.get(), (idx_t)-1, state.rhs_rows[i]);
73023
+ art.SearchEqual(state.keys[i], (idx_t)-1, state.rhs_rows[i]);
72877
73024
  state.result_sizes[i] = state.rhs_rows[i].size();
72878
73025
  }
72879
73026
  } else {
@@ -80005,11 +80152,7 @@ void PhysicalCreateIndex::Combine(ExecutionContext &context, GlobalSinkState &gs
80005
80152
  }
80006
80153
 
80007
80154
  // merge the local index into the global index
80008
- {
80009
- IndexLock global_lock;
80010
- gstate.global_index->InitializeLock(global_lock);
80011
- gstate.global_index->MergeIndexes(lstate.local_index.get());
80012
- }
80155
+ gstate.global_index->MergeIndexes(lstate.local_index.get());
80013
80156
  }
80014
80157
 
80015
80158
  SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
@@ -88375,6 +88518,9 @@ template <bool LAST, bool SKIP_NULLS>
88375
88518
  struct FirstFunctionString : public FirstFunctionBase {
88376
88519
  template <class STATE>
88377
88520
  static void SetValue(STATE *state, string_t value, bool is_null) {
88521
+ if (LAST && state->is_set) {
88522
+ Destroy(state);
88523
+ }
88378
88524
  if (is_null) {
88379
88525
  if (!SKIP_NULLS) {
88380
88526
  state->is_set = true;
@@ -193092,6 +193238,24 @@ data_ptr_t ArenaAllocator::Allocate(idx_t len) {
193092
193238
  return result;
193093
193239
  }
193094
193240
 
193241
+ void ArenaAllocator::Reset() {
193242
+
193243
+ if (head) {
193244
+ // destroy all chunks except the current one
193245
+ if (head->next) {
193246
+ auto current_next = move(head->next);
193247
+ while (current_next) {
193248
+ current_next = move(current_next->next);
193249
+ }
193250
+ }
193251
+ tail = head.get();
193252
+
193253
+ // reset the head
193254
+ head->current_position = 0;
193255
+ head->prev = nullptr;
193256
+ }
193257
+ }
193258
+
193095
193259
  void ArenaAllocator::Destroy() {
193096
193260
  head = nullptr;
193097
193261
  tail = nullptr;
@@ -201767,15 +201931,15 @@ void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
201767
201931
  Delete(state, entries, row_identifiers);
201768
201932
  }
201769
201933
 
201770
- void Index::MergeIndexes(Index *other_index) {
201934
+ bool Index::MergeIndexes(Index *other_index) {
201935
+
201936
+ IndexLock state;
201937
+ InitializeLock(state);
201771
201938
 
201772
- // create the global index
201773
201939
  switch (this->type) {
201774
201940
  case IndexType::ART: {
201775
- auto this_art = (ART *)this;
201776
- auto other_art = (ART *)other_index;
201777
- ART::Merge(this_art, other_art);
201778
- break;
201941
+ auto art = (ART *)this;
201942
+ return art->MergeIndexes(state, other_index);
201779
201943
  }
201780
201944
  default:
201781
201945
  throw InternalException("Unimplemented index type for merge");