duckdb 0.5.2-dev708.0 → 0.5.2-dev737.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +530 -366
- package/src/duckdb.hpp +32 -9
- package/src/parquet-amalgamation.cpp +37449 -37449
package/src/duckdb.cpp
CHANGED
|
@@ -1388,11 +1388,6 @@ CopyFunctionCatalogEntry::CopyFunctionCatalogEntry(Catalog *catalog, SchemaCatal
|
|
|
1388
1388
|
|
|
1389
1389
|
|
|
1390
1390
|
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
1391
|
//===----------------------------------------------------------------------===//
|
|
1397
1392
|
// DuckDB
|
|
1398
1393
|
//
|
|
@@ -1609,34 +1604,47 @@ inline void Radix::EncodeData(data_ptr_t dataptr, interval_t value) {
|
|
|
1609
1604
|
|
|
1610
1605
|
|
|
1611
1606
|
|
|
1607
|
+
|
|
1612
1608
|
namespace duckdb {
|
|
1613
1609
|
|
|
1614
1610
|
class Key {
|
|
1615
1611
|
public:
|
|
1616
|
-
Key(
|
|
1617
|
-
|
|
1618
|
-
|
|
1612
|
+
Key();
|
|
1613
|
+
Key(data_ptr_t data, idx_t len);
|
|
1614
|
+
Key(ArenaAllocator &allocator, idx_t len);
|
|
1619
1615
|
|
|
1620
1616
|
idx_t len;
|
|
1621
|
-
|
|
1617
|
+
data_ptr_t data;
|
|
1622
1618
|
|
|
1623
1619
|
public:
|
|
1624
1620
|
template <class T>
|
|
1625
|
-
static inline
|
|
1626
|
-
auto data = Key::CreateData<T>(element);
|
|
1627
|
-
return
|
|
1621
|
+
static inline Key CreateKey(ArenaAllocator &allocator, T element) {
|
|
1622
|
+
auto data = Key::CreateData<T>(allocator, element);
|
|
1623
|
+
return Key(data, sizeof(element));
|
|
1624
|
+
}
|
|
1625
|
+
|
|
1626
|
+
template <class T>
|
|
1627
|
+
static inline Key CreateKey(ArenaAllocator &allocator, const Value &element) {
|
|
1628
|
+
return CreateKey(allocator, element.GetValueUnsafe<T>());
|
|
1628
1629
|
}
|
|
1629
1630
|
|
|
1630
1631
|
template <class T>
|
|
1631
|
-
static inline
|
|
1632
|
-
|
|
1632
|
+
static inline void CreateKey(ArenaAllocator &allocator, Key &key, T element) {
|
|
1633
|
+
key.data = Key::CreateData<T>(allocator, element);
|
|
1634
|
+
key.len = sizeof(element);
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
template <class T>
|
|
1638
|
+
static inline void CreateKey(ArenaAllocator &allocator, Key &key, const Value element) {
|
|
1639
|
+
key.data = Key::CreateData<T>(allocator, element.GetValueUnsafe<T>());
|
|
1640
|
+
key.len = sizeof(element);
|
|
1633
1641
|
}
|
|
1634
1642
|
|
|
1635
1643
|
public:
|
|
1636
|
-
data_t &operator[](
|
|
1644
|
+
data_t &operator[](size_t i) {
|
|
1637
1645
|
return data[i];
|
|
1638
1646
|
}
|
|
1639
|
-
const data_t &operator[](
|
|
1647
|
+
const data_t &operator[](size_t i) const {
|
|
1640
1648
|
return data[i];
|
|
1641
1649
|
}
|
|
1642
1650
|
bool operator>(const Key &k) const;
|
|
@@ -1645,23 +1653,39 @@ public:
|
|
|
1645
1653
|
bool operator==(const Key &k) const;
|
|
1646
1654
|
|
|
1647
1655
|
bool ByteMatches(Key &other, idx_t &depth);
|
|
1656
|
+
bool Empty();
|
|
1657
|
+
void ConcatenateKey(ArenaAllocator &allocator, Key &concat_key);
|
|
1648
1658
|
|
|
1649
1659
|
private:
|
|
1650
1660
|
template <class T>
|
|
1651
|
-
static inline
|
|
1652
|
-
auto data =
|
|
1653
|
-
Radix::EncodeData<T>(data
|
|
1661
|
+
static inline data_ptr_t CreateData(ArenaAllocator &allocator, T value) {
|
|
1662
|
+
auto data = allocator.Allocate(sizeof(value));
|
|
1663
|
+
Radix::EncodeData<T>(data, value);
|
|
1654
1664
|
return data;
|
|
1655
1665
|
}
|
|
1656
1666
|
};
|
|
1657
1667
|
|
|
1658
1668
|
template <>
|
|
1659
|
-
|
|
1669
|
+
Key Key::CreateKey(ArenaAllocator &allocator, string_t value);
|
|
1670
|
+
template <>
|
|
1671
|
+
Key Key::CreateKey(ArenaAllocator &allocator, const char *value);
|
|
1660
1672
|
template <>
|
|
1661
|
-
|
|
1673
|
+
void Key::CreateKey(ArenaAllocator &allocator, Key &key, string_t value);
|
|
1674
|
+
template <>
|
|
1675
|
+
void Key::CreateKey(ArenaAllocator &allocator, Key &key, const char *value);
|
|
1662
1676
|
|
|
1663
1677
|
} // namespace duckdb
|
|
1664
1678
|
|
|
1679
|
+
//===----------------------------------------------------------------------===//
|
|
1680
|
+
// DuckDB
|
|
1681
|
+
//
|
|
1682
|
+
// duckdb/execution/index/art/iterator.hpp
|
|
1683
|
+
//
|
|
1684
|
+
//
|
|
1685
|
+
//===----------------------------------------------------------------------===//
|
|
1686
|
+
|
|
1687
|
+
|
|
1688
|
+
|
|
1665
1689
|
//===----------------------------------------------------------------------===//
|
|
1666
1690
|
// DuckDB
|
|
1667
1691
|
//
|
|
@@ -1683,6 +1707,7 @@ unique_ptr<Key> Key::CreateKey(const char *value);
|
|
|
1683
1707
|
|
|
1684
1708
|
|
|
1685
1709
|
|
|
1710
|
+
|
|
1686
1711
|
//===----------------------------------------------------------------------===//
|
|
1687
1712
|
// DuckDB
|
|
1688
1713
|
//
|
|
@@ -1693,7 +1718,6 @@ unique_ptr<Key> Key::CreateKey(const char *value);
|
|
|
1693
1718
|
|
|
1694
1719
|
|
|
1695
1720
|
|
|
1696
|
-
|
|
1697
1721
|
//===----------------------------------------------------------------------===//
|
|
1698
1722
|
// DuckDB
|
|
1699
1723
|
//
|
|
@@ -1738,6 +1762,7 @@ private:
|
|
|
1738
1762
|
} // namespace duckdb
|
|
1739
1763
|
|
|
1740
1764
|
|
|
1765
|
+
|
|
1741
1766
|
namespace duckdb {
|
|
1742
1767
|
class Prefix {
|
|
1743
1768
|
public:
|
|
@@ -1786,7 +1811,6 @@ private:
|
|
|
1786
1811
|
|
|
1787
1812
|
|
|
1788
1813
|
|
|
1789
|
-
|
|
1790
1814
|
namespace duckdb {
|
|
1791
1815
|
enum class NodeType : uint8_t { NLeaf = 0, N4 = 1, N16 = 2, N48 = 3, N256 = 4 };
|
|
1792
1816
|
class ART;
|
|
@@ -1870,16 +1894,18 @@ public:
|
|
|
1870
1894
|
//! Create a new node of the specified type
|
|
1871
1895
|
static void New(NodeType &type, Node *&node);
|
|
1872
1896
|
|
|
1897
|
+
//! Returns the string representation of a node
|
|
1898
|
+
string ToString(ART &art);
|
|
1873
1899
|
//! Serialize this node
|
|
1874
1900
|
BlockPointer Serialize(ART &art, duckdb::MetaBlockWriter &writer);
|
|
1901
|
+
|
|
1875
1902
|
//! Deserialize this node
|
|
1876
1903
|
static Node *Deserialize(ART &art, idx_t block_id, idx_t offset);
|
|
1877
|
-
|
|
1878
1904
|
//! Merge r_node into l_node at the specified byte
|
|
1879
|
-
static
|
|
1905
|
+
static bool MergeAtByte(MergeInfo &info, idx_t depth, idx_t &l_child_pos, idx_t &r_pos, uint8_t &key_byte,
|
|
1880
1906
|
Node *&l_parent, idx_t l_pos);
|
|
1881
1907
|
//! Merge two ART
|
|
1882
|
-
static
|
|
1908
|
+
static bool MergeARTs(ART *l_art, ART *r_art);
|
|
1883
1909
|
|
|
1884
1910
|
private:
|
|
1885
1911
|
//! Serialize internal nodes
|
|
@@ -1912,8 +1938,10 @@ public:
|
|
|
1912
1938
|
//! Remove a row_id from a leaf
|
|
1913
1939
|
void Remove(row_t row_id);
|
|
1914
1940
|
|
|
1941
|
+
//! Returns the string representation of a leaf
|
|
1942
|
+
static string ToString(Node *node);
|
|
1915
1943
|
//! Merge two NLeaf nodes
|
|
1916
|
-
static void Merge(
|
|
1944
|
+
static void Merge(Node *&l_node, Node *&r_node);
|
|
1917
1945
|
|
|
1918
1946
|
//! Serialize a leaf
|
|
1919
1947
|
BlockPointer Serialize(duckdb::MetaBlockWriter &writer);
|
|
@@ -1927,10 +1955,73 @@ private:
|
|
|
1927
1955
|
} // namespace duckdb
|
|
1928
1956
|
|
|
1929
1957
|
|
|
1958
|
+
|
|
1959
|
+
namespace duckdb {
|
|
1960
|
+
|
|
1961
|
+
struct IteratorEntry {
|
|
1962
|
+
IteratorEntry() {
|
|
1963
|
+
}
|
|
1964
|
+
IteratorEntry(Node *node, idx_t pos) : node(node), pos(pos) {
|
|
1965
|
+
}
|
|
1966
|
+
|
|
1967
|
+
Node *node = nullptr;
|
|
1968
|
+
idx_t pos = 0;
|
|
1969
|
+
};
|
|
1970
|
+
|
|
1971
|
+
//! Keeps track of the current key in the iterator
|
|
1972
|
+
class IteratorCurrentKey {
|
|
1973
|
+
public:
|
|
1974
|
+
//! Push Byte
|
|
1975
|
+
void Push(uint8_t key);
|
|
1976
|
+
//! Pops n elements
|
|
1977
|
+
void Pop(idx_t n);
|
|
1978
|
+
//! Subscript operator
|
|
1979
|
+
uint8_t &operator[](idx_t idx);
|
|
1980
|
+
bool operator>(const Key &k) const;
|
|
1981
|
+
bool operator>=(const Key &k) const;
|
|
1982
|
+
bool operator==(const Key &k) const;
|
|
1983
|
+
|
|
1984
|
+
private:
|
|
1985
|
+
//! The current key position
|
|
1986
|
+
idx_t cur_key_pos = 0;
|
|
1987
|
+
//! The current key of the Leaf Node
|
|
1988
|
+
vector<uint8_t> key;
|
|
1989
|
+
};
|
|
1990
|
+
|
|
1991
|
+
class Iterator {
|
|
1992
|
+
public:
|
|
1993
|
+
//! Current Key
|
|
1994
|
+
IteratorCurrentKey cur_key;
|
|
1995
|
+
//! Pointer to the ART tree we are iterating
|
|
1996
|
+
ART *art = nullptr;
|
|
1997
|
+
|
|
1998
|
+
//! Scan the tree
|
|
1999
|
+
bool Scan(Key &bound, idx_t max_count, vector<row_t> &result_ids, bool is_inclusive);
|
|
2000
|
+
//! Finds minimum value of the tree
|
|
2001
|
+
void FindMinimum(Node &node);
|
|
2002
|
+
//! Goes to lower bound
|
|
2003
|
+
bool LowerBound(Node *node, Key &key, bool inclusive);
|
|
2004
|
+
|
|
2005
|
+
private:
|
|
2006
|
+
//! Stack of iterator entries
|
|
2007
|
+
stack<IteratorEntry> nodes;
|
|
2008
|
+
//! Last visited leaf
|
|
2009
|
+
Leaf *last_leaf = nullptr;
|
|
2010
|
+
//! Go to the next node
|
|
2011
|
+
bool Next();
|
|
2012
|
+
//! Push part of the key to cur_key
|
|
2013
|
+
void PushKey(Node *node, uint16_t pos);
|
|
2014
|
+
//! Pop node
|
|
2015
|
+
void PopNode();
|
|
2016
|
+
};
|
|
2017
|
+
} // namespace duckdb
|
|
2018
|
+
|
|
2019
|
+
|
|
2020
|
+
|
|
1930
2021
|
//===----------------------------------------------------------------------===//
|
|
1931
2022
|
// DuckDB
|
|
1932
2023
|
//
|
|
1933
|
-
// duckdb/execution/index/art/
|
|
2024
|
+
// duckdb/execution/index/art/node16.hpp
|
|
1934
2025
|
//
|
|
1935
2026
|
//
|
|
1936
2027
|
//===----------------------------------------------------------------------===//
|
|
@@ -1977,14 +2068,14 @@ public:
|
|
|
1977
2068
|
|
|
1978
2069
|
namespace duckdb {
|
|
1979
2070
|
|
|
1980
|
-
class
|
|
2071
|
+
class Node16 : public Node {
|
|
1981
2072
|
public:
|
|
1982
|
-
|
|
1983
|
-
uint8_t key[
|
|
1984
|
-
SwizzleablePointer children[
|
|
2073
|
+
explicit Node16();
|
|
2074
|
+
uint8_t key[16];
|
|
2075
|
+
SwizzleablePointer children[16];
|
|
1985
2076
|
|
|
1986
2077
|
public:
|
|
1987
|
-
//! Get position of a byte, returns DConstants::INVALID_INDEX if not exists
|
|
2078
|
+
//! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
|
|
1988
2079
|
idx_t GetChildPos(uint8_t k) override;
|
|
1989
2080
|
//! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
|
|
1990
2081
|
//! if there are no children matching the criteria
|
|
@@ -1993,18 +2084,18 @@ public:
|
|
|
1993
2084
|
idx_t GetMin() override;
|
|
1994
2085
|
//! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position
|
|
1995
2086
|
idx_t GetNextPos(idx_t pos) override;
|
|
1996
|
-
//! Get
|
|
2087
|
+
//! Get Node16 child
|
|
1997
2088
|
Node *GetChild(ART &art, idx_t pos) override;
|
|
1998
2089
|
//! Replace child pointer
|
|
1999
2090
|
void ReplaceChildPointer(idx_t pos, Node *node) override;
|
|
2000
2091
|
|
|
2001
|
-
//! Insert a new child node at key_byte into the
|
|
2092
|
+
//! Insert a new child node at key_byte into the Node16
|
|
2002
2093
|
static void InsertChild(Node *&node, uint8_t key_byte, Node *new_child);
|
|
2003
|
-
//! Erase the child at pos and (if necessary)
|
|
2094
|
+
//! Erase the child at pos and (if necessary) shrink to Node4
|
|
2004
2095
|
static void EraseChild(Node *&node, int pos, ART &art);
|
|
2005
|
-
//! Merge
|
|
2006
|
-
static
|
|
2007
|
-
//! Returns the size (maximum capacity) of the
|
|
2096
|
+
//! Merge Node16 into l_node
|
|
2097
|
+
static bool Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
|
|
2098
|
+
//! Returns the size (maximum capacity) of the Node16
|
|
2008
2099
|
static idx_t GetSize();
|
|
2009
2100
|
};
|
|
2010
2101
|
} // namespace duckdb
|
|
@@ -2012,7 +2103,7 @@ public:
|
|
|
2012
2103
|
//===----------------------------------------------------------------------===//
|
|
2013
2104
|
// DuckDB
|
|
2014
2105
|
//
|
|
2015
|
-
// duckdb/execution/index/art/
|
|
2106
|
+
// duckdb/execution/index/art/node256.hpp
|
|
2016
2107
|
//
|
|
2017
2108
|
//
|
|
2018
2109
|
//===----------------------------------------------------------------------===//
|
|
@@ -2023,11 +2114,10 @@ public:
|
|
|
2023
2114
|
|
|
2024
2115
|
namespace duckdb {
|
|
2025
2116
|
|
|
2026
|
-
class
|
|
2117
|
+
class Node256 : public Node {
|
|
2027
2118
|
public:
|
|
2028
|
-
explicit
|
|
2029
|
-
|
|
2030
|
-
SwizzleablePointer children[16];
|
|
2119
|
+
explicit Node256();
|
|
2120
|
+
SwizzleablePointer children[256];
|
|
2031
2121
|
|
|
2032
2122
|
public:
|
|
2033
2123
|
//! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
|
|
@@ -2039,18 +2129,18 @@ public:
|
|
|
2039
2129
|
idx_t GetMin() override;
|
|
2040
2130
|
//! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position
|
|
2041
2131
|
idx_t GetNextPos(idx_t pos) override;
|
|
2042
|
-
//! Get
|
|
2132
|
+
//! Get Node256 child
|
|
2043
2133
|
Node *GetChild(ART &art, idx_t pos) override;
|
|
2044
2134
|
//! Replace child pointer
|
|
2045
2135
|
void ReplaceChildPointer(idx_t pos, Node *node) override;
|
|
2046
2136
|
|
|
2047
|
-
//! Insert a new child node at key_byte into the
|
|
2137
|
+
//! Insert a new child node at key_byte into the Node256
|
|
2048
2138
|
static void InsertChild(Node *&node, uint8_t key_byte, Node *new_child);
|
|
2049
|
-
//! Erase the child at pos and (if necessary) shrink to
|
|
2139
|
+
//! Erase the child at pos and (if necessary) shrink to Node48
|
|
2050
2140
|
static void EraseChild(Node *&node, int pos, ART &art);
|
|
2051
|
-
//! Merge
|
|
2052
|
-
static
|
|
2053
|
-
//! Returns the size (maximum capacity) of the
|
|
2141
|
+
//! Merge Node256 into l_node
|
|
2142
|
+
static bool Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
|
|
2143
|
+
//! Returns the size (maximum capacity) of the Node256
|
|
2054
2144
|
static idx_t GetSize();
|
|
2055
2145
|
};
|
|
2056
2146
|
} // namespace duckdb
|
|
@@ -2058,7 +2148,7 @@ public:
|
|
|
2058
2148
|
//===----------------------------------------------------------------------===//
|
|
2059
2149
|
// DuckDB
|
|
2060
2150
|
//
|
|
2061
|
-
// duckdb/execution/index/art/
|
|
2151
|
+
// duckdb/execution/index/art/node4.hpp
|
|
2062
2152
|
//
|
|
2063
2153
|
//
|
|
2064
2154
|
//===----------------------------------------------------------------------===//
|
|
@@ -2069,14 +2159,14 @@ public:
|
|
|
2069
2159
|
|
|
2070
2160
|
namespace duckdb {
|
|
2071
2161
|
|
|
2072
|
-
class
|
|
2162
|
+
class Node4 : public Node {
|
|
2073
2163
|
public:
|
|
2074
|
-
|
|
2075
|
-
uint8_t
|
|
2076
|
-
SwizzleablePointer children[
|
|
2164
|
+
Node4();
|
|
2165
|
+
uint8_t key[4];
|
|
2166
|
+
SwizzleablePointer children[4];
|
|
2077
2167
|
|
|
2078
2168
|
public:
|
|
2079
|
-
//! Get position of a
|
|
2169
|
+
//! Get position of a byte, returns DConstants::INVALID_INDEX if not exists
|
|
2080
2170
|
idx_t GetChildPos(uint8_t k) override;
|
|
2081
2171
|
//! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
|
|
2082
2172
|
//! if there are no children matching the criteria
|
|
@@ -2085,18 +2175,18 @@ public:
|
|
|
2085
2175
|
idx_t GetMin() override;
|
|
2086
2176
|
//! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position
|
|
2087
2177
|
idx_t GetNextPos(idx_t pos) override;
|
|
2088
|
-
//! Get
|
|
2178
|
+
//! Get Node4 child
|
|
2089
2179
|
Node *GetChild(ART &art, idx_t pos) override;
|
|
2090
2180
|
//! Replace child pointer
|
|
2091
2181
|
void ReplaceChildPointer(idx_t pos, Node *node) override;
|
|
2092
2182
|
|
|
2093
|
-
//! Insert a new child node at key_byte into the
|
|
2183
|
+
//! Insert a new child node at key_byte into the Node4
|
|
2094
2184
|
static void InsertChild(Node *&node, uint8_t key_byte, Node *new_child);
|
|
2095
|
-
//! Erase the child at pos and (if necessary)
|
|
2185
|
+
//! Erase the child at pos and (if necessary) merge with last child
|
|
2096
2186
|
static void EraseChild(Node *&node, int pos, ART &art);
|
|
2097
|
-
//! Merge
|
|
2098
|
-
static
|
|
2099
|
-
//! Returns the size (maximum capacity) of the
|
|
2187
|
+
//! Merge Node4 into l_node
|
|
2188
|
+
static bool Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
|
|
2189
|
+
//! Returns the size (maximum capacity) of the Node4
|
|
2100
2190
|
static idx_t GetSize();
|
|
2101
2191
|
};
|
|
2102
2192
|
} // namespace duckdb
|
|
@@ -2104,7 +2194,7 @@ public:
|
|
|
2104
2194
|
//===----------------------------------------------------------------------===//
|
|
2105
2195
|
// DuckDB
|
|
2106
2196
|
//
|
|
2107
|
-
// duckdb/execution/index/art/
|
|
2197
|
+
// duckdb/execution/index/art/node48.hpp
|
|
2108
2198
|
//
|
|
2109
2199
|
//
|
|
2110
2200
|
//===----------------------------------------------------------------------===//
|
|
@@ -2115,10 +2205,11 @@ public:
|
|
|
2115
2205
|
|
|
2116
2206
|
namespace duckdb {
|
|
2117
2207
|
|
|
2118
|
-
class
|
|
2208
|
+
class Node48 : public Node {
|
|
2119
2209
|
public:
|
|
2120
|
-
explicit
|
|
2121
|
-
|
|
2210
|
+
explicit Node48();
|
|
2211
|
+
uint8_t child_index[256];
|
|
2212
|
+
SwizzleablePointer children[48];
|
|
2122
2213
|
|
|
2123
2214
|
public:
|
|
2124
2215
|
//! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
|
|
@@ -2130,94 +2221,25 @@ public:
|
|
|
2130
2221
|
idx_t GetMin() override;
|
|
2131
2222
|
//! Get the next position in the node, or DConstants::INVALID_INDEX if there is no next position
|
|
2132
2223
|
idx_t GetNextPos(idx_t pos) override;
|
|
2133
|
-
//! Get
|
|
2224
|
+
//! Get Node48 child
|
|
2134
2225
|
Node *GetChild(ART &art, idx_t pos) override;
|
|
2135
2226
|
//! Replace child pointer
|
|
2136
2227
|
void ReplaceChildPointer(idx_t pos, Node *node) override;
|
|
2137
2228
|
|
|
2138
|
-
//! Insert a new child node at key_byte into the
|
|
2229
|
+
//! Insert a new child node at key_byte into the Node48
|
|
2139
2230
|
static void InsertChild(Node *&node, uint8_t key_byte, Node *new_child);
|
|
2140
|
-
//! Erase the child at pos and (if necessary) shrink to
|
|
2231
|
+
//! Erase the child at pos and (if necessary) shrink to Node16
|
|
2141
2232
|
static void EraseChild(Node *&node, int pos, ART &art);
|
|
2142
|
-
//! Merge
|
|
2143
|
-
static
|
|
2144
|
-
//! Returns the size (maximum capacity) of the
|
|
2233
|
+
//! Merge Node48 into l_node
|
|
2234
|
+
static bool Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos);
|
|
2235
|
+
//! Returns the size (maximum capacity) of the Node48
|
|
2145
2236
|
static idx_t GetSize();
|
|
2146
2237
|
};
|
|
2147
2238
|
} // namespace duckdb
|
|
2148
2239
|
|
|
2149
|
-
//===----------------------------------------------------------------------===//
|
|
2150
|
-
// DuckDB
|
|
2151
|
-
//
|
|
2152
|
-
// duckdb/execution/index/art/iterator.hpp
|
|
2153
|
-
//
|
|
2154
|
-
//
|
|
2155
|
-
//===----------------------------------------------------------------------===//
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
namespace duckdb {
|
|
2163
|
-
|
|
2164
|
-
struct IteratorEntry {
|
|
2165
|
-
IteratorEntry() {
|
|
2166
|
-
}
|
|
2167
|
-
IteratorEntry(Node *node, idx_t pos) : node(node), pos(pos) {
|
|
2168
|
-
}
|
|
2169
|
-
|
|
2170
|
-
Node *node = nullptr;
|
|
2171
|
-
idx_t pos = 0;
|
|
2172
|
-
};
|
|
2173
2240
|
|
|
2174
|
-
//! Keeps track of the current key in the iterator
|
|
2175
|
-
class IteratorCurrentKey {
|
|
2176
|
-
public:
|
|
2177
|
-
//! Push Byte
|
|
2178
|
-
void Push(uint8_t key);
|
|
2179
|
-
//! Pops n elements
|
|
2180
|
-
void Pop(idx_t n);
|
|
2181
|
-
//! Subscript operator
|
|
2182
|
-
uint8_t &operator[](idx_t idx);
|
|
2183
|
-
bool operator>(const Key &k) const;
|
|
2184
|
-
bool operator>=(const Key &k) const;
|
|
2185
|
-
bool operator==(const Key &k) const;
|
|
2186
|
-
|
|
2187
|
-
private:
|
|
2188
|
-
//! The current key position
|
|
2189
|
-
idx_t cur_key_pos = 0;
|
|
2190
|
-
//! The current key of the Leaf Node
|
|
2191
|
-
vector<uint8_t> key;
|
|
2192
|
-
};
|
|
2193
|
-
|
|
2194
|
-
class Iterator {
|
|
2195
|
-
public:
|
|
2196
|
-
//! Current Key
|
|
2197
|
-
IteratorCurrentKey cur_key;
|
|
2198
|
-
//! Pointer to the ART tree we are iterating
|
|
2199
|
-
ART *art = nullptr;
|
|
2200
2241
|
|
|
2201
|
-
//! Scan the tree
|
|
2202
|
-
bool Scan(Key *bound, idx_t max_count, vector<row_t> &result_ids, bool is_inclusive);
|
|
2203
|
-
//! Finds minimum value of the tree
|
|
2204
|
-
void FindMinimum(Node &node);
|
|
2205
|
-
//! Goes to lower bound
|
|
2206
|
-
bool LowerBound(Node *node, Key &key, bool inclusive);
|
|
2207
2242
|
|
|
2208
|
-
private:
|
|
2209
|
-
//! Stack of iterator entries
|
|
2210
|
-
stack<IteratorEntry> nodes;
|
|
2211
|
-
//! Last visited leaf
|
|
2212
|
-
Leaf *last_leaf = nullptr;
|
|
2213
|
-
//! Go to the next node
|
|
2214
|
-
bool Next();
|
|
2215
|
-
//! Push part of the key to cur_key
|
|
2216
|
-
void PushKey(Node *node, uint16_t pos);
|
|
2217
|
-
//! Pop node
|
|
2218
|
-
void PopNode();
|
|
2219
|
-
};
|
|
2220
|
-
} // namespace duckdb
|
|
2221
2243
|
|
|
2222
2244
|
|
|
2223
2245
|
namespace duckdb {
|
|
@@ -2287,33 +2309,37 @@ public:
|
|
|
2287
2309
|
//! Construct ARTs from sorted chunks and merge them.
|
|
2288
2310
|
void ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator &allocator) override;
|
|
2289
2311
|
|
|
2290
|
-
|
|
2312
|
+
//! Search Equal and fetches the row IDs
|
|
2313
|
+
bool SearchEqual(Key &key, idx_t max_count, vector<row_t> &result_ids);
|
|
2291
2314
|
//! Search Equal used for Joins that do not need to fetch data
|
|
2292
|
-
void SearchEqualJoinNoFetch(
|
|
2315
|
+
void SearchEqualJoinNoFetch(Key &key, idx_t &result_size);
|
|
2293
2316
|
//! Serialized the ART
|
|
2294
2317
|
BlockPointer Serialize(duckdb::MetaBlockWriter &writer) override;
|
|
2295
2318
|
|
|
2296
2319
|
//! Merge two ARTs
|
|
2297
|
-
|
|
2320
|
+
bool MergeIndexes(IndexLock &state, Index *other_index) override;
|
|
2321
|
+
//! Generate ART keys for an input chunk
|
|
2322
|
+
static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<Key> &keys);
|
|
2323
|
+
//! Returns the string representation of an ART
|
|
2324
|
+
string ToString() override;
|
|
2298
2325
|
|
|
2299
2326
|
private:
|
|
2300
2327
|
//! Insert a row id into a leaf node
|
|
2301
2328
|
bool InsertToLeaf(Leaf &leaf, row_t row_id);
|
|
2302
2329
|
//! Insert the leaf value into the tree
|
|
2303
|
-
bool Insert(Node *&node,
|
|
2330
|
+
bool Insert(Node *&node, Key &key, idx_t depth, row_t row_id);
|
|
2304
2331
|
|
|
2305
2332
|
//! Erase element from leaf (if leaf has more than one value) or eliminate the leaf itself
|
|
2306
|
-
void Erase(Node *&node, Key &key,
|
|
2333
|
+
void Erase(Node *&node, Key &key, idx_t depth, row_t row_id);
|
|
2307
2334
|
|
|
2308
2335
|
//! Find the node with a matching key, optimistic version
|
|
2309
|
-
|
|
2310
|
-
|
|
2311
|
-
bool SearchGreater(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector<row_t> &result_ids);
|
|
2312
|
-
bool SearchLess(ARTIndexScanState *state, bool inclusive, idx_t max_count, vector<row_t> &result_ids);
|
|
2313
|
-
bool SearchCloseRange(ARTIndexScanState *state, bool left_inclusive, bool right_inclusive, idx_t max_count,
|
|
2314
|
-
vector<row_t> &result_ids);
|
|
2336
|
+
Leaf *Lookup(Node *node, Key &key, idx_t depth);
|
|
2315
2337
|
|
|
2316
|
-
|
|
2338
|
+
bool SearchGreater(ARTIndexScanState *state, Key &key, bool inclusive, idx_t max_count, vector<row_t> &result_ids);
|
|
2339
|
+
bool SearchLess(ARTIndexScanState *state, Key &upper_bound, bool inclusive, idx_t max_count,
|
|
2340
|
+
vector<row_t> &result_ids);
|
|
2341
|
+
bool SearchCloseRange(ARTIndexScanState *state, Key &lower_bound, Key &upper_bound, bool left_inclusive,
|
|
2342
|
+
bool right_inclusive, idx_t max_count, vector<row_t> &result_ids);
|
|
2317
2343
|
|
|
2318
2344
|
void VerifyExistence(DataChunk &chunk, VerifyExistenceType verify_type, string *err_msg_ptr = nullptr);
|
|
2319
2345
|
};
|
|
@@ -33323,7 +33349,7 @@ void RowOperations::SwizzleColumns(const RowLayout &layout, const data_ptr_t bas
|
|
|
33323
33349
|
}
|
|
33324
33350
|
data_ptr_t col_ptr = row_ptr + layout.GetOffsets()[col_idx];
|
|
33325
33351
|
if (physical_type == PhysicalType::VARCHAR) {
|
|
33326
|
-
data_ptr_t string_ptr = col_ptr +
|
|
33352
|
+
data_ptr_t string_ptr = col_ptr + string_t::HEADER_SIZE;
|
|
33327
33353
|
for (idx_t i = 0; i < next; i++) {
|
|
33328
33354
|
if (Load<uint32_t>(col_ptr) > string_t::INLINE_LENGTH) {
|
|
33329
33355
|
// Overwrite the string pointer with the within-row offset (if not inlined)
|
|
@@ -33427,7 +33453,7 @@ void RowOperations::UnswizzlePointers(const RowLayout &layout, const data_ptr_t
|
|
|
33427
33453
|
}
|
|
33428
33454
|
data_ptr_t col_ptr = row_ptr + layout.GetOffsets()[col_idx];
|
|
33429
33455
|
if (physical_type == PhysicalType::VARCHAR) {
|
|
33430
|
-
data_ptr_t string_ptr = col_ptr +
|
|
33456
|
+
data_ptr_t string_ptr = col_ptr + string_t::HEADER_SIZE;
|
|
33431
33457
|
for (idx_t i = 0; i < next; i++) {
|
|
33432
33458
|
if (Load<uint32_t>(col_ptr) > string_t::INLINE_LENGTH) {
|
|
33433
33459
|
// Overwrite the string offset with the pointer (if not inlined)
|
|
@@ -33584,7 +33610,7 @@ static void GatherVarchar(Vector &rows, const SelectionVector &row_sel, Vector &
|
|
|
33584
33610
|
// Not inline, so unswizzle the copied pointer the pointer
|
|
33585
33611
|
auto heap_ptr_ptr = row + heap_offset;
|
|
33586
33612
|
auto heap_row_ptr = base_heap_ptr + Load<idx_t>(heap_ptr_ptr);
|
|
33587
|
-
auto string_ptr = data_ptr_t(data + col_idx) +
|
|
33613
|
+
auto string_ptr = data_ptr_t(data + col_idx) + string_t::HEADER_SIZE;
|
|
33588
33614
|
Store<data_ptr_t>(heap_row_ptr + Load<idx_t>(string_ptr), string_ptr);
|
|
33589
33615
|
#ifdef DEBUG
|
|
33590
33616
|
data[col_idx].Verify();
|
|
@@ -35753,14 +35779,14 @@ int Comparators::TemplatedCompareListLoop(data_ptr_t &left_ptr, data_ptr_t &righ
|
|
|
35753
35779
|
|
|
35754
35780
|
void Comparators::UnswizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type) {
|
|
35755
35781
|
if (type.InternalType() == PhysicalType::VARCHAR) {
|
|
35756
|
-
data_ptr +=
|
|
35782
|
+
data_ptr += string_t::HEADER_SIZE;
|
|
35757
35783
|
}
|
|
35758
35784
|
Store<data_ptr_t>(heap_ptr + Load<idx_t>(data_ptr), data_ptr);
|
|
35759
35785
|
}
|
|
35760
35786
|
|
|
35761
35787
|
void Comparators::SwizzleSingleValue(data_ptr_t data_ptr, const data_ptr_t &heap_ptr, const LogicalType &type) {
|
|
35762
35788
|
if (type.InternalType() == PhysicalType::VARCHAR) {
|
|
35763
|
-
data_ptr +=
|
|
35789
|
+
data_ptr += string_t::HEADER_SIZE;
|
|
35764
35790
|
}
|
|
35765
35791
|
Store<idx_t>(Load<data_ptr_t>(data_ptr) - heap_ptr, data_ptr);
|
|
35766
35792
|
}
|
|
@@ -58262,6 +58288,7 @@ ExpressionExecutorState::ExpressionExecutorState(const string &name) : profiler(
|
|
|
58262
58288
|
|
|
58263
58289
|
|
|
58264
58290
|
|
|
58291
|
+
|
|
58265
58292
|
#include <algorithm>
|
|
58266
58293
|
#include <cstring>
|
|
58267
58294
|
#include <ctgmath>
|
|
@@ -58326,90 +58353,87 @@ unique_ptr<IndexScanState> ART::InitializeScanTwoPredicates(Transaction &transac
|
|
|
58326
58353
|
}
|
|
58327
58354
|
|
|
58328
58355
|
//===--------------------------------------------------------------------===//
|
|
58329
|
-
//
|
|
58356
|
+
// Keys
|
|
58330
58357
|
//===--------------------------------------------------------------------===//
|
|
58358
|
+
|
|
58331
58359
|
template <class T>
|
|
58332
|
-
static void TemplatedGenerateKeys(Vector &input, idx_t count, vector<
|
|
58360
|
+
static void TemplatedGenerateKeys(ArenaAllocator &allocator, Vector &input, idx_t count, vector<Key> &keys) {
|
|
58333
58361
|
UnifiedVectorFormat idata;
|
|
58334
58362
|
input.ToUnifiedFormat(count, idata);
|
|
58335
58363
|
|
|
58364
|
+
D_ASSERT(keys.size() >= count);
|
|
58336
58365
|
auto input_data = (T *)idata.data;
|
|
58337
58366
|
for (idx_t i = 0; i < count; i++) {
|
|
58338
58367
|
auto idx = idata.sel->get_index(i);
|
|
58339
58368
|
if (idata.validity.RowIsValid(idx)) {
|
|
58340
|
-
|
|
58341
|
-
} else {
|
|
58342
|
-
keys.push_back(nullptr);
|
|
58369
|
+
Key::CreateKey<T>(allocator, keys[i], input_data[idx]);
|
|
58343
58370
|
}
|
|
58344
58371
|
}
|
|
58345
58372
|
}
|
|
58346
58373
|
|
|
58347
58374
|
template <class T>
|
|
58348
|
-
static void ConcatenateKeys(Vector &input, idx_t count, vector<
|
|
58375
|
+
static void ConcatenateKeys(ArenaAllocator &allocator, Vector &input, idx_t count, vector<Key> &keys) {
|
|
58349
58376
|
UnifiedVectorFormat idata;
|
|
58350
58377
|
input.ToUnifiedFormat(count, idata);
|
|
58351
58378
|
|
|
58352
58379
|
auto input_data = (T *)idata.data;
|
|
58353
58380
|
for (idx_t i = 0; i < count; i++) {
|
|
58354
58381
|
auto idx = idata.sel->get_index(i);
|
|
58355
|
-
|
|
58356
|
-
|
|
58357
|
-
|
|
58358
|
-
|
|
58359
|
-
|
|
58360
|
-
|
|
58361
|
-
|
|
58362
|
-
|
|
58363
|
-
|
|
58364
|
-
|
|
58365
|
-
memcpy(compound_data.get() + old_key->len, new_key->data.get(), new_key->len);
|
|
58366
|
-
keys[i] = make_unique<Key>(move(compound_data), key_len);
|
|
58382
|
+
|
|
58383
|
+
// key is not NULL (no previous column entry was NULL)
|
|
58384
|
+
if (!keys[i].Empty()) {
|
|
58385
|
+
if (!idata.validity.RowIsValid(idx)) {
|
|
58386
|
+
// this column entry is NULL, set whole key to NULL
|
|
58387
|
+
keys[i] = Key();
|
|
58388
|
+
} else {
|
|
58389
|
+
auto other_key = Key::CreateKey<T>(allocator, input_data[idx]);
|
|
58390
|
+
keys[i].ConcatenateKey(allocator, other_key);
|
|
58391
|
+
}
|
|
58367
58392
|
}
|
|
58368
58393
|
}
|
|
58369
58394
|
}
|
|
58370
58395
|
|
|
58371
|
-
void ART::GenerateKeys(DataChunk &input, vector<
|
|
58372
|
-
keys.reserve(STANDARD_VECTOR_SIZE);
|
|
58396
|
+
void ART::GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<Key> &keys) {
|
|
58373
58397
|
// generate keys for the first input column
|
|
58374
58398
|
switch (input.data[0].GetType().InternalType()) {
|
|
58375
58399
|
case PhysicalType::BOOL:
|
|
58376
|
-
TemplatedGenerateKeys<bool>(input.data[0], input.size(), keys);
|
|
58400
|
+
TemplatedGenerateKeys<bool>(allocator, input.data[0], input.size(), keys);
|
|
58377
58401
|
break;
|
|
58378
58402
|
case PhysicalType::INT8:
|
|
58379
|
-
TemplatedGenerateKeys<int8_t>(input.data[0], input.size(), keys);
|
|
58403
|
+
TemplatedGenerateKeys<int8_t>(allocator, input.data[0], input.size(), keys);
|
|
58380
58404
|
break;
|
|
58381
58405
|
case PhysicalType::INT16:
|
|
58382
|
-
TemplatedGenerateKeys<int16_t>(input.data[0], input.size(), keys);
|
|
58406
|
+
TemplatedGenerateKeys<int16_t>(allocator, input.data[0], input.size(), keys);
|
|
58383
58407
|
break;
|
|
58384
58408
|
case PhysicalType::INT32:
|
|
58385
|
-
TemplatedGenerateKeys<int32_t>(input.data[0], input.size(), keys);
|
|
58409
|
+
TemplatedGenerateKeys<int32_t>(allocator, input.data[0], input.size(), keys);
|
|
58386
58410
|
break;
|
|
58387
58411
|
case PhysicalType::INT64:
|
|
58388
|
-
TemplatedGenerateKeys<int64_t>(input.data[0], input.size(), keys);
|
|
58412
|
+
TemplatedGenerateKeys<int64_t>(allocator, input.data[0], input.size(), keys);
|
|
58389
58413
|
break;
|
|
58390
58414
|
case PhysicalType::INT128:
|
|
58391
|
-
TemplatedGenerateKeys<hugeint_t>(input.data[0], input.size(), keys);
|
|
58415
|
+
TemplatedGenerateKeys<hugeint_t>(allocator, input.data[0], input.size(), keys);
|
|
58392
58416
|
break;
|
|
58393
58417
|
case PhysicalType::UINT8:
|
|
58394
|
-
TemplatedGenerateKeys<uint8_t>(input.data[0], input.size(), keys);
|
|
58418
|
+
TemplatedGenerateKeys<uint8_t>(allocator, input.data[0], input.size(), keys);
|
|
58395
58419
|
break;
|
|
58396
58420
|
case PhysicalType::UINT16:
|
|
58397
|
-
TemplatedGenerateKeys<uint16_t>(input.data[0], input.size(), keys);
|
|
58421
|
+
TemplatedGenerateKeys<uint16_t>(allocator, input.data[0], input.size(), keys);
|
|
58398
58422
|
break;
|
|
58399
58423
|
case PhysicalType::UINT32:
|
|
58400
|
-
TemplatedGenerateKeys<uint32_t>(input.data[0], input.size(), keys);
|
|
58424
|
+
TemplatedGenerateKeys<uint32_t>(allocator, input.data[0], input.size(), keys);
|
|
58401
58425
|
break;
|
|
58402
58426
|
case PhysicalType::UINT64:
|
|
58403
|
-
TemplatedGenerateKeys<uint64_t>(input.data[0], input.size(), keys);
|
|
58427
|
+
TemplatedGenerateKeys<uint64_t>(allocator, input.data[0], input.size(), keys);
|
|
58404
58428
|
break;
|
|
58405
58429
|
case PhysicalType::FLOAT:
|
|
58406
|
-
TemplatedGenerateKeys<float>(input.data[0], input.size(), keys);
|
|
58430
|
+
TemplatedGenerateKeys<float>(allocator, input.data[0], input.size(), keys);
|
|
58407
58431
|
break;
|
|
58408
58432
|
case PhysicalType::DOUBLE:
|
|
58409
|
-
TemplatedGenerateKeys<double>(input.data[0], input.size(), keys);
|
|
58433
|
+
TemplatedGenerateKeys<double>(allocator, input.data[0], input.size(), keys);
|
|
58410
58434
|
break;
|
|
58411
58435
|
case PhysicalType::VARCHAR:
|
|
58412
|
-
TemplatedGenerateKeys<string_t>(input.data[0], input.size(), keys);
|
|
58436
|
+
TemplatedGenerateKeys<string_t>(allocator, input.data[0], input.size(), keys);
|
|
58413
58437
|
break;
|
|
58414
58438
|
default:
|
|
58415
58439
|
throw InternalException("Invalid type for index");
|
|
@@ -58419,43 +58443,43 @@ void ART::GenerateKeys(DataChunk &input, vector<unique_ptr<Key>> &keys) {
|
|
|
58419
58443
|
// for each of the remaining columns, concatenate
|
|
58420
58444
|
switch (input.data[i].GetType().InternalType()) {
|
|
58421
58445
|
case PhysicalType::BOOL:
|
|
58422
|
-
ConcatenateKeys<bool>(input.data[i], input.size(), keys);
|
|
58446
|
+
ConcatenateKeys<bool>(allocator, input.data[i], input.size(), keys);
|
|
58423
58447
|
break;
|
|
58424
58448
|
case PhysicalType::INT8:
|
|
58425
|
-
ConcatenateKeys<int8_t>(input.data[i], input.size(), keys);
|
|
58449
|
+
ConcatenateKeys<int8_t>(allocator, input.data[i], input.size(), keys);
|
|
58426
58450
|
break;
|
|
58427
58451
|
case PhysicalType::INT16:
|
|
58428
|
-
ConcatenateKeys<int16_t>(input.data[i], input.size(), keys);
|
|
58452
|
+
ConcatenateKeys<int16_t>(allocator, input.data[i], input.size(), keys);
|
|
58429
58453
|
break;
|
|
58430
58454
|
case PhysicalType::INT32:
|
|
58431
|
-
ConcatenateKeys<int32_t>(input.data[i], input.size(), keys);
|
|
58455
|
+
ConcatenateKeys<int32_t>(allocator, input.data[i], input.size(), keys);
|
|
58432
58456
|
break;
|
|
58433
58457
|
case PhysicalType::INT64:
|
|
58434
|
-
ConcatenateKeys<int64_t>(input.data[i], input.size(), keys);
|
|
58458
|
+
ConcatenateKeys<int64_t>(allocator, input.data[i], input.size(), keys);
|
|
58435
58459
|
break;
|
|
58436
58460
|
case PhysicalType::INT128:
|
|
58437
|
-
ConcatenateKeys<hugeint_t>(input.data[i], input.size(), keys);
|
|
58461
|
+
ConcatenateKeys<hugeint_t>(allocator, input.data[i], input.size(), keys);
|
|
58438
58462
|
break;
|
|
58439
58463
|
case PhysicalType::UINT8:
|
|
58440
|
-
ConcatenateKeys<uint8_t>(input.data[i], input.size(), keys);
|
|
58464
|
+
ConcatenateKeys<uint8_t>(allocator, input.data[i], input.size(), keys);
|
|
58441
58465
|
break;
|
|
58442
58466
|
case PhysicalType::UINT16:
|
|
58443
|
-
ConcatenateKeys<uint16_t>(input.data[i], input.size(), keys);
|
|
58467
|
+
ConcatenateKeys<uint16_t>(allocator, input.data[i], input.size(), keys);
|
|
58444
58468
|
break;
|
|
58445
58469
|
case PhysicalType::UINT32:
|
|
58446
|
-
ConcatenateKeys<uint32_t>(input.data[i], input.size(), keys);
|
|
58470
|
+
ConcatenateKeys<uint32_t>(allocator, input.data[i], input.size(), keys);
|
|
58447
58471
|
break;
|
|
58448
58472
|
case PhysicalType::UINT64:
|
|
58449
|
-
ConcatenateKeys<uint64_t>(input.data[i], input.size(), keys);
|
|
58473
|
+
ConcatenateKeys<uint64_t>(allocator, input.data[i], input.size(), keys);
|
|
58450
58474
|
break;
|
|
58451
58475
|
case PhysicalType::FLOAT:
|
|
58452
|
-
ConcatenateKeys<float>(input.data[i], input.size(), keys);
|
|
58476
|
+
ConcatenateKeys<float>(allocator, input.data[i], input.size(), keys);
|
|
58453
58477
|
break;
|
|
58454
58478
|
case PhysicalType::DOUBLE:
|
|
58455
|
-
ConcatenateKeys<double>(input.data[i], input.size(), keys);
|
|
58479
|
+
ConcatenateKeys<double>(allocator, input.data[i], input.size(), keys);
|
|
58456
58480
|
break;
|
|
58457
58481
|
case PhysicalType::VARCHAR:
|
|
58458
|
-
ConcatenateKeys<string_t>(input.data[i], input.size(), keys);
|
|
58482
|
+
ConcatenateKeys<string_t>(allocator, input.data[i], input.size(), keys);
|
|
58459
58483
|
break;
|
|
58460
58484
|
default:
|
|
58461
58485
|
throw InternalException("Invalid type for index");
|
|
@@ -58463,22 +58487,26 @@ void ART::GenerateKeys(DataChunk &input, vector<unique_ptr<Key>> &keys) {
|
|
|
58463
58487
|
}
|
|
58464
58488
|
}
|
|
58465
58489
|
|
|
58490
|
+
//===--------------------------------------------------------------------===//
|
|
58491
|
+
// Insert
|
|
58492
|
+
//===--------------------------------------------------------------------===//
|
|
58493
|
+
|
|
58466
58494
|
struct KeySection {
|
|
58467
58495
|
KeySection(idx_t start_p, idx_t end_p, idx_t depth_p, data_t key_byte_p)
|
|
58468
58496
|
: start(start_p), end(end_p), depth(depth_p), key_byte(key_byte_p) {};
|
|
58469
|
-
KeySection(idx_t start_p, idx_t end_p, vector<
|
|
58470
|
-
: start(start_p), end(end_p), depth(key_section.depth + 1), key_byte(keys[end_p]
|
|
58497
|
+
KeySection(idx_t start_p, idx_t end_p, vector<Key> &keys, KeySection &key_section)
|
|
58498
|
+
: start(start_p), end(end_p), depth(key_section.depth + 1), key_byte(keys[end_p].data[key_section.depth]) {};
|
|
58471
58499
|
idx_t start;
|
|
58472
58500
|
idx_t end;
|
|
58473
58501
|
idx_t depth;
|
|
58474
58502
|
data_t key_byte;
|
|
58475
58503
|
};
|
|
58476
58504
|
|
|
58477
|
-
void GetChildSections(vector<KeySection> &child_sections, vector<
|
|
58505
|
+
void GetChildSections(vector<KeySection> &child_sections, vector<Key> &keys, KeySection &key_section) {
|
|
58478
58506
|
|
|
58479
58507
|
idx_t child_start_idx = key_section.start;
|
|
58480
58508
|
for (idx_t i = key_section.start + 1; i <= key_section.end; i++) {
|
|
58481
|
-
if (keys[i - 1]
|
|
58509
|
+
if (keys[i - 1].data[key_section.depth] != keys[i].data[key_section.depth]) {
|
|
58482
58510
|
child_sections.emplace_back(child_start_idx, i - 1, keys, key_section);
|
|
58483
58511
|
child_start_idx = i;
|
|
58484
58512
|
}
|
|
@@ -58486,15 +58514,14 @@ void GetChildSections(vector<KeySection> &child_sections, vector<unique_ptr<Key>
|
|
|
58486
58514
|
child_sections.emplace_back(child_start_idx, key_section.end, keys, key_section);
|
|
58487
58515
|
}
|
|
58488
58516
|
|
|
58489
|
-
void Construct(vector<
|
|
58490
|
-
bool &has_constraint) {
|
|
58517
|
+
void Construct(vector<Key> &keys, row_t *row_ids, Node *&node, KeySection &key_section, bool &has_constraint) {
|
|
58491
58518
|
|
|
58492
58519
|
D_ASSERT(key_section.start < keys.size());
|
|
58493
58520
|
D_ASSERT(key_section.end < keys.size());
|
|
58494
58521
|
D_ASSERT(key_section.start <= key_section.end);
|
|
58495
58522
|
|
|
58496
|
-
auto &start_key =
|
|
58497
|
-
auto &end_key =
|
|
58523
|
+
auto &start_key = keys[key_section.start];
|
|
58524
|
+
auto &end_key = keys[key_section.end];
|
|
58498
58525
|
|
|
58499
58526
|
// increment the depth until we reach a leaf or find a mismatching byte
|
|
58500
58527
|
auto prefix_start = key_section.depth;
|
|
@@ -58542,11 +58569,11 @@ void Construct(vector<unique_ptr<Key>> &keys, row_t *row_ids, Node *&node, KeySe
|
|
|
58542
58569
|
}
|
|
58543
58570
|
}
|
|
58544
58571
|
|
|
58545
|
-
void FindFirstNotNullKey(vector<
|
|
58572
|
+
void FindFirstNotNullKey(vector<Key> &keys, bool &skipped_all_nulls, idx_t &start_idx) {
|
|
58546
58573
|
|
|
58547
58574
|
if (!skipped_all_nulls) {
|
|
58548
58575
|
for (idx_t i = 0; i < keys.size(); i++) {
|
|
58549
|
-
if (keys[i]) {
|
|
58576
|
+
if (!keys[i].Empty()) {
|
|
58550
58577
|
start_idx = i;
|
|
58551
58578
|
skipped_all_nulls = true;
|
|
58552
58579
|
return;
|
|
@@ -58560,9 +58587,13 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
|
|
|
58560
58587
|
auto payload_types = logical_types;
|
|
58561
58588
|
payload_types.emplace_back(LogicalType::ROW_TYPE);
|
|
58562
58589
|
|
|
58590
|
+
ArenaAllocator arena_allocator(allocator);
|
|
58591
|
+
vector<Key> keys(STANDARD_VECTOR_SIZE);
|
|
58592
|
+
|
|
58563
58593
|
auto skipped_all_nulls = false;
|
|
58564
58594
|
auto temp_art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
|
|
58565
58595
|
this->constraint_type, this->db);
|
|
58596
|
+
|
|
58566
58597
|
for (;;) {
|
|
58567
58598
|
DataChunk ordered_chunk;
|
|
58568
58599
|
ordered_chunk.Initialize(allocator, payload_types);
|
|
@@ -58581,8 +58612,8 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
|
|
|
58581
58612
|
D_ASSERT(logical_types[0] == ordered_chunk.data[0].GetType());
|
|
58582
58613
|
|
|
58583
58614
|
// generate the keys for the given input
|
|
58584
|
-
|
|
58585
|
-
GenerateKeys(ordered_chunk, keys);
|
|
58615
|
+
arena_allocator.Reset();
|
|
58616
|
+
GenerateKeys(arena_allocator, ordered_chunk, keys);
|
|
58586
58617
|
|
|
58587
58618
|
// we order NULLS FIRST, so we might have to skip nulls at the start of our sorted data
|
|
58588
58619
|
idx_t start_idx = 0;
|
|
@@ -58608,16 +58639,21 @@ void ART::ConstructAndMerge(IndexLock &lock, PayloadScanner &scanner, Allocator
|
|
|
58608
58639
|
auto art = make_unique<ART>(this->column_ids, this->table_io_manager, this->unbound_expressions,
|
|
58609
58640
|
this->constraint_type, this->db);
|
|
58610
58641
|
auto key_section = KeySection(start_idx, ordered_chunk.size() - 1, 0, 0);
|
|
58611
|
-
auto has_constraint =
|
|
58642
|
+
auto has_constraint = IsUnique();
|
|
58612
58643
|
Construct(keys, row_ids, art->tree, key_section, has_constraint);
|
|
58613
58644
|
|
|
58614
58645
|
// merge art into temp_art
|
|
58615
|
-
|
|
58646
|
+
if (!temp_art->MergeIndexes(lock, art.get())) {
|
|
58647
|
+
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
|
58648
|
+
}
|
|
58616
58649
|
}
|
|
58617
58650
|
|
|
58618
58651
|
// NOTE: currently this code is only used for index creation, so we can assume that there are no
|
|
58619
|
-
// duplicate violations between the existing index and the new data
|
|
58620
|
-
|
|
58652
|
+
// duplicate violations between the existing index and the new data,
|
|
58653
|
+
// so we do not need to revert any changes
|
|
58654
|
+
if (!this->MergeIndexes(lock, temp_art.get())) {
|
|
58655
|
+
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
|
58656
|
+
}
|
|
58621
58657
|
}
|
|
58622
58658
|
|
|
58623
58659
|
bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
|
|
@@ -58625,39 +58661,35 @@ bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
|
|
|
58625
58661
|
D_ASSERT(logical_types[0] == input.data[0].GetType());
|
|
58626
58662
|
|
|
58627
58663
|
// generate the keys for the given input
|
|
58628
|
-
|
|
58629
|
-
|
|
58664
|
+
ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
|
|
58665
|
+
vector<Key> keys(input.size());
|
|
58666
|
+
GenerateKeys(arena_allocator, input, keys);
|
|
58630
58667
|
|
|
58631
58668
|
// now insert the elements into the index
|
|
58632
58669
|
row_ids.Flatten(input.size());
|
|
58633
58670
|
auto row_identifiers = FlatVector::GetData<row_t>(row_ids);
|
|
58634
58671
|
idx_t failed_index = DConstants::INVALID_INDEX;
|
|
58635
58672
|
for (idx_t i = 0; i < input.size(); i++) {
|
|
58636
|
-
if (
|
|
58673
|
+
if (keys[i].Empty()) {
|
|
58637
58674
|
continue;
|
|
58638
58675
|
}
|
|
58639
58676
|
|
|
58640
58677
|
row_t row_id = row_identifiers[i];
|
|
58641
|
-
if (!Insert(tree,
|
|
58678
|
+
if (!Insert(tree, keys[i], 0, row_id)) {
|
|
58642
58679
|
// failed to insert because of constraint violation
|
|
58643
58680
|
failed_index = i;
|
|
58644
58681
|
break;
|
|
58645
58682
|
}
|
|
58646
58683
|
}
|
|
58647
58684
|
if (failed_index != DConstants::INVALID_INDEX) {
|
|
58648
|
-
// failed to insert because of constraint violation: remove previously inserted entries
|
|
58649
|
-
// generate keys again
|
|
58650
|
-
keys.clear();
|
|
58651
|
-
GenerateKeys(input, keys);
|
|
58652
|
-
unique_ptr<Key> key;
|
|
58653
58685
|
|
|
58654
|
-
//
|
|
58686
|
+
// failed to insert because of constraint violation: remove previously inserted entries
|
|
58655
58687
|
for (idx_t i = 0; i < failed_index; i++) {
|
|
58656
|
-
if (
|
|
58688
|
+
if (keys[i].Empty()) {
|
|
58657
58689
|
continue;
|
|
58658
58690
|
}
|
|
58659
58691
|
row_t row_id = row_identifiers[i];
|
|
58660
|
-
Erase(tree,
|
|
58692
|
+
Erase(tree, keys[i], 0, row_id);
|
|
58661
58693
|
}
|
|
58662
58694
|
return false;
|
|
58663
58695
|
}
|
|
@@ -58700,11 +58732,11 @@ bool ART::InsertToLeaf(Leaf &leaf, row_t row_id) {
|
|
|
58700
58732
|
return true;
|
|
58701
58733
|
}
|
|
58702
58734
|
|
|
58703
|
-
bool ART::Insert(Node *&node,
|
|
58704
|
-
|
|
58735
|
+
bool ART::Insert(Node *&node, Key &key, idx_t depth, row_t row_id) {
|
|
58736
|
+
|
|
58705
58737
|
if (!node) {
|
|
58706
58738
|
// node is currently empty, create a leaf here with the key
|
|
58707
|
-
node = new Leaf(
|
|
58739
|
+
node = new Leaf(key, depth, row_id);
|
|
58708
58740
|
return true;
|
|
58709
58741
|
}
|
|
58710
58742
|
|
|
@@ -58731,7 +58763,7 @@ bool ART::Insert(Node *&node, unique_ptr<Key> value, unsigned depth, row_t row_i
|
|
|
58731
58763
|
new_node->prefix = Prefix(key, depth, new_prefix_length);
|
|
58732
58764
|
auto key_byte = node->prefix.Reduce(new_prefix_length);
|
|
58733
58765
|
Node4::InsertChild(new_node, key_byte, node);
|
|
58734
|
-
Node *leaf_node = new Leaf(
|
|
58766
|
+
Node *leaf_node = new Leaf(key, depth + new_prefix_length + 1, row_id);
|
|
58735
58767
|
Node4::InsertChild(new_node, key[depth + new_prefix_length], leaf_node);
|
|
58736
58768
|
node = new_node;
|
|
58737
58769
|
return true;
|
|
@@ -58748,7 +58780,7 @@ bool ART::Insert(Node *&node, unique_ptr<Key> value, unsigned depth, row_t row_i
|
|
|
58748
58780
|
auto key_byte = node->prefix.Reduce(mismatch_pos);
|
|
58749
58781
|
Node4::InsertChild(new_node, key_byte, node);
|
|
58750
58782
|
|
|
58751
|
-
Node *leaf_node = new Leaf(
|
|
58783
|
+
Node *leaf_node = new Leaf(key, depth + mismatch_pos + 1, row_id);
|
|
58752
58784
|
Node4::InsertChild(new_node, key[depth + mismatch_pos], leaf_node);
|
|
58753
58785
|
node = new_node;
|
|
58754
58786
|
return true;
|
|
@@ -58761,11 +58793,11 @@ bool ART::Insert(Node *&node, unique_ptr<Key> value, unsigned depth, row_t row_i
|
|
|
58761
58793
|
idx_t pos = node->GetChildPos(key[depth]);
|
|
58762
58794
|
if (pos != DConstants::INVALID_INDEX) {
|
|
58763
58795
|
auto child = node->GetChild(*this, pos);
|
|
58764
|
-
bool insertion_result = Insert(child,
|
|
58796
|
+
bool insertion_result = Insert(child, key, depth + 1, row_id);
|
|
58765
58797
|
node->ReplaceChildPointer(pos, child);
|
|
58766
58798
|
return insertion_result;
|
|
58767
58799
|
}
|
|
58768
|
-
Node *new_node = new Leaf(
|
|
58800
|
+
Node *new_node = new Leaf(key, depth + 1, row_id);
|
|
58769
58801
|
Node::InsertChild(node, key[depth], new_node);
|
|
58770
58802
|
return true;
|
|
58771
58803
|
}
|
|
@@ -58781,20 +58813,21 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
|
|
58781
58813
|
ExecuteExpressions(input, expression);
|
|
58782
58814
|
|
|
58783
58815
|
// then generate the keys for the given input
|
|
58784
|
-
|
|
58785
|
-
|
|
58816
|
+
ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
|
|
58817
|
+
vector<Key> keys(expression.size());
|
|
58818
|
+
GenerateKeys(arena_allocator, expression, keys);
|
|
58786
58819
|
|
|
58787
58820
|
// now erase the elements from the database
|
|
58788
58821
|
row_ids.Flatten(input.size());
|
|
58789
58822
|
auto row_identifiers = FlatVector::GetData<row_t>(row_ids);
|
|
58790
58823
|
|
|
58791
58824
|
for (idx_t i = 0; i < input.size(); i++) {
|
|
58792
|
-
if (
|
|
58825
|
+
if (keys[i].Empty()) {
|
|
58793
58826
|
continue;
|
|
58794
58827
|
}
|
|
58795
|
-
Erase(tree,
|
|
58828
|
+
Erase(tree, keys[i], 0, row_identifiers[i]);
|
|
58796
58829
|
#ifdef DEBUG
|
|
58797
|
-
auto node = Lookup(tree,
|
|
58830
|
+
auto node = Lookup(tree, keys[i], 0);
|
|
58798
58831
|
if (node) {
|
|
58799
58832
|
auto leaf = static_cast<Leaf *>(node);
|
|
58800
58833
|
for (idx_t k = 0; k < leaf->count; k++) {
|
|
@@ -58805,7 +58838,7 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
|
|
58805
58838
|
}
|
|
58806
58839
|
}
|
|
58807
58840
|
|
|
58808
|
-
void ART::Erase(Node *&node, Key &key,
|
|
58841
|
+
void ART::Erase(Node *&node, Key &key, idx_t depth, row_t row_id) {
|
|
58809
58842
|
if (!node) {
|
|
58810
58843
|
return;
|
|
58811
58844
|
}
|
|
@@ -58853,43 +58886,43 @@ void ART::Erase(Node *&node, Key &key, unsigned depth, row_t row_id) {
|
|
|
58853
58886
|
//===--------------------------------------------------------------------===//
|
|
58854
58887
|
// Point Query
|
|
58855
58888
|
//===--------------------------------------------------------------------===//
|
|
58856
|
-
static
|
|
58889
|
+
static Key CreateKey(ArenaAllocator &allocator, PhysicalType type, Value &value) {
|
|
58857
58890
|
D_ASSERT(type == value.type().InternalType());
|
|
58858
58891
|
switch (type) {
|
|
58859
58892
|
case PhysicalType::BOOL:
|
|
58860
|
-
return Key::CreateKey<bool>(value);
|
|
58893
|
+
return Key::CreateKey<bool>(allocator, value);
|
|
58861
58894
|
case PhysicalType::INT8:
|
|
58862
|
-
return Key::CreateKey<int8_t>(value);
|
|
58895
|
+
return Key::CreateKey<int8_t>(allocator, value);
|
|
58863
58896
|
case PhysicalType::INT16:
|
|
58864
|
-
return Key::CreateKey<int16_t>(value);
|
|
58897
|
+
return Key::CreateKey<int16_t>(allocator, value);
|
|
58865
58898
|
case PhysicalType::INT32:
|
|
58866
|
-
return Key::CreateKey<int32_t>(value);
|
|
58899
|
+
return Key::CreateKey<int32_t>(allocator, value);
|
|
58867
58900
|
case PhysicalType::INT64:
|
|
58868
|
-
return Key::CreateKey<int64_t>(value);
|
|
58901
|
+
return Key::CreateKey<int64_t>(allocator, value);
|
|
58869
58902
|
case PhysicalType::UINT8:
|
|
58870
|
-
return Key::CreateKey<uint8_t>(value);
|
|
58903
|
+
return Key::CreateKey<uint8_t>(allocator, value);
|
|
58871
58904
|
case PhysicalType::UINT16:
|
|
58872
|
-
return Key::CreateKey<uint16_t>(value);
|
|
58905
|
+
return Key::CreateKey<uint16_t>(allocator, value);
|
|
58873
58906
|
case PhysicalType::UINT32:
|
|
58874
|
-
return Key::CreateKey<uint32_t>(value);
|
|
58907
|
+
return Key::CreateKey<uint32_t>(allocator, value);
|
|
58875
58908
|
case PhysicalType::UINT64:
|
|
58876
|
-
return Key::CreateKey<uint64_t>(value);
|
|
58909
|
+
return Key::CreateKey<uint64_t>(allocator, value);
|
|
58877
58910
|
case PhysicalType::INT128:
|
|
58878
|
-
return Key::CreateKey<hugeint_t>(value);
|
|
58911
|
+
return Key::CreateKey<hugeint_t>(allocator, value);
|
|
58879
58912
|
case PhysicalType::FLOAT:
|
|
58880
|
-
return Key::CreateKey<float>(value);
|
|
58913
|
+
return Key::CreateKey<float>(allocator, value);
|
|
58881
58914
|
case PhysicalType::DOUBLE:
|
|
58882
|
-
return Key::CreateKey<double>(value);
|
|
58915
|
+
return Key::CreateKey<double>(allocator, value);
|
|
58883
58916
|
case PhysicalType::VARCHAR:
|
|
58884
|
-
return Key::CreateKey<string_t>(value);
|
|
58917
|
+
return Key::CreateKey<string_t>(allocator, value);
|
|
58885
58918
|
default:
|
|
58886
58919
|
throw InternalException("Invalid type for index");
|
|
58887
58920
|
}
|
|
58888
58921
|
}
|
|
58889
58922
|
|
|
58890
|
-
bool ART::SearchEqual(
|
|
58891
|
-
|
|
58892
|
-
auto leaf = static_cast<Leaf *>(Lookup(tree,
|
|
58923
|
+
bool ART::SearchEqual(Key &key, idx_t max_count, vector<row_t> &result_ids) {
|
|
58924
|
+
|
|
58925
|
+
auto leaf = static_cast<Leaf *>(Lookup(tree, key, 0));
|
|
58893
58926
|
if (!leaf) {
|
|
58894
58927
|
return true;
|
|
58895
58928
|
}
|
|
@@ -58903,17 +58936,17 @@ bool ART::SearchEqual(ARTIndexScanState *state, idx_t max_count, vector<row_t> &
|
|
|
58903
58936
|
return true;
|
|
58904
58937
|
}
|
|
58905
58938
|
|
|
58906
|
-
void ART::SearchEqualJoinNoFetch(
|
|
58907
|
-
|
|
58908
|
-
|
|
58909
|
-
auto leaf =
|
|
58939
|
+
void ART::SearchEqualJoinNoFetch(Key &key, idx_t &result_size) {
|
|
58940
|
+
|
|
58941
|
+
// we need to look for a leaf
|
|
58942
|
+
auto leaf = Lookup(tree, key, 0);
|
|
58910
58943
|
if (!leaf) {
|
|
58911
58944
|
return;
|
|
58912
58945
|
}
|
|
58913
58946
|
result_size = leaf->count;
|
|
58914
58947
|
}
|
|
58915
58948
|
|
|
58916
|
-
|
|
58949
|
+
Leaf *ART::Lookup(Node *node, Key &key, idx_t depth) {
|
|
58917
58950
|
while (node) {
|
|
58918
58951
|
if (node->type == NodeType::NLeaf) {
|
|
58919
58952
|
auto leaf = (Leaf *)node;
|
|
@@ -58924,7 +58957,7 @@ Node *ART::Lookup(Node *node, Key &key, unsigned depth) {
|
|
|
58924
58957
|
return nullptr;
|
|
58925
58958
|
}
|
|
58926
58959
|
}
|
|
58927
|
-
return node;
|
|
58960
|
+
return (Leaf *)node;
|
|
58928
58961
|
}
|
|
58929
58962
|
if (node->prefix.Size()) {
|
|
58930
58963
|
for (idx_t pos = 0; pos < node->prefix.Size(); pos++) {
|
|
@@ -58950,112 +58983,127 @@ Node *ART::Lookup(Node *node, Key &key, unsigned depth) {
|
|
|
58950
58983
|
// Returns: True (If found leaf >= key)
|
|
58951
58984
|
// False (Otherwise)
|
|
58952
58985
|
//===--------------------------------------------------------------------===//
|
|
58953
|
-
bool ART::SearchGreater(ARTIndexScanState *state, bool inclusive, idx_t max_count,
|
|
58986
|
+
bool ART::SearchGreater(ARTIndexScanState *state, Key &key, bool inclusive, idx_t max_count,
|
|
58987
|
+
vector<row_t> &result_ids) {
|
|
58988
|
+
|
|
58954
58989
|
Iterator *it = &state->iterator;
|
|
58955
|
-
auto key = CreateKey(*this, types[0], state->values[0]);
|
|
58956
58990
|
|
|
58957
58991
|
// greater than scan: first set the iterator to the node at which we will start our scan by finding the lowest node
|
|
58958
58992
|
// that satisfies our requirement
|
|
58959
58993
|
if (!it->art) {
|
|
58960
58994
|
it->art = this;
|
|
58961
|
-
bool found = it->LowerBound(tree,
|
|
58995
|
+
bool found = it->LowerBound(tree, key, inclusive);
|
|
58962
58996
|
if (!found) {
|
|
58963
58997
|
return true;
|
|
58964
58998
|
}
|
|
58965
58999
|
}
|
|
58966
59000
|
// after that we continue the scan; we don't need to check the bounds as any value following this value is
|
|
58967
59001
|
// automatically bigger and hence satisfies our predicate
|
|
58968
|
-
|
|
59002
|
+
Key empty_key = Key();
|
|
59003
|
+
return it->Scan(empty_key, max_count, result_ids, false);
|
|
58969
59004
|
}
|
|
58970
59005
|
|
|
58971
59006
|
//===--------------------------------------------------------------------===//
|
|
58972
59007
|
// Less Than
|
|
58973
59008
|
//===--------------------------------------------------------------------===//
|
|
58974
|
-
bool ART::SearchLess(ARTIndexScanState *state, bool inclusive, idx_t max_count,
|
|
59009
|
+
bool ART::SearchLess(ARTIndexScanState *state, Key &upper_bound, bool inclusive, idx_t max_count,
|
|
59010
|
+
vector<row_t> &result_ids) {
|
|
59011
|
+
|
|
58975
59012
|
if (!tree) {
|
|
58976
59013
|
return true;
|
|
58977
59014
|
}
|
|
58978
59015
|
|
|
58979
59016
|
Iterator *it = &state->iterator;
|
|
58980
|
-
auto upper_bound = CreateKey(*this, types[0], state->values[0]);
|
|
58981
59017
|
|
|
58982
59018
|
if (!it->art) {
|
|
58983
59019
|
it->art = this;
|
|
58984
59020
|
// first find the minimum value in the ART: we start scanning from this value
|
|
58985
59021
|
it->FindMinimum(*tree);
|
|
58986
59022
|
// early out min value higher than upper bound query
|
|
58987
|
-
if (it->cur_key >
|
|
59023
|
+
if (it->cur_key > upper_bound) {
|
|
58988
59024
|
return true;
|
|
58989
59025
|
}
|
|
58990
59026
|
}
|
|
58991
59027
|
// now continue the scan until we reach the upper bound
|
|
58992
|
-
return it->Scan(upper_bound
|
|
59028
|
+
return it->Scan(upper_bound, max_count, result_ids, inclusive);
|
|
58993
59029
|
}
|
|
58994
59030
|
|
|
58995
59031
|
//===--------------------------------------------------------------------===//
|
|
58996
59032
|
// Closed Range Query
|
|
58997
59033
|
//===--------------------------------------------------------------------===//
|
|
58998
|
-
bool ART::SearchCloseRange(ARTIndexScanState *state,
|
|
58999
|
-
vector<row_t> &result_ids) {
|
|
59000
|
-
|
|
59001
|
-
auto upper_bound = CreateKey(*this, types[0], state->values[1]);
|
|
59034
|
+
bool ART::SearchCloseRange(ARTIndexScanState *state, Key &lower_bound, Key &upper_bound, bool left_inclusive,
|
|
59035
|
+
bool right_inclusive, idx_t max_count, vector<row_t> &result_ids) {
|
|
59036
|
+
|
|
59002
59037
|
Iterator *it = &state->iterator;
|
|
59038
|
+
|
|
59003
59039
|
// first find the first node that satisfies the left predicate
|
|
59004
59040
|
if (!it->art) {
|
|
59005
59041
|
it->art = this;
|
|
59006
|
-
bool found = it->LowerBound(tree,
|
|
59042
|
+
bool found = it->LowerBound(tree, lower_bound, left_inclusive);
|
|
59007
59043
|
if (!found) {
|
|
59008
59044
|
return true;
|
|
59009
59045
|
}
|
|
59010
59046
|
}
|
|
59011
59047
|
// now continue the scan until we reach the upper bound
|
|
59012
|
-
return it->Scan(upper_bound
|
|
59048
|
+
return it->Scan(upper_bound, max_count, result_ids, right_inclusive);
|
|
59013
59049
|
}
|
|
59014
59050
|
|
|
59015
59051
|
bool ART::Scan(Transaction &transaction, DataTable &table, IndexScanState &table_state, idx_t max_count,
|
|
59016
59052
|
vector<row_t> &result_ids) {
|
|
59053
|
+
|
|
59017
59054
|
auto state = (ARTIndexScanState *)&table_state;
|
|
59055
|
+
vector<row_t> row_ids;
|
|
59056
|
+
bool success;
|
|
59018
59057
|
|
|
59058
|
+
// FIXME: the key directly owning the data for a single key might be more efficient
|
|
59019
59059
|
D_ASSERT(state->values[0].type().InternalType() == types[0]);
|
|
59060
|
+
ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
|
|
59061
|
+
auto key = CreateKey(arena_allocator, types[0], state->values[0]);
|
|
59020
59062
|
|
|
59021
|
-
vector<row_t> row_ids;
|
|
59022
|
-
bool success;
|
|
59023
59063
|
if (state->values[1].IsNull()) {
|
|
59024
|
-
|
|
59064
|
+
|
|
59025
59065
|
// single predicate
|
|
59066
|
+
lock_guard<mutex> l(lock);
|
|
59026
59067
|
switch (state->expressions[0]) {
|
|
59027
59068
|
case ExpressionType::COMPARE_EQUAL:
|
|
59028
|
-
success = SearchEqual(
|
|
59069
|
+
success = SearchEqual(key, max_count, row_ids);
|
|
59029
59070
|
break;
|
|
59030
59071
|
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
|
|
59031
|
-
success = SearchGreater(state, true, max_count, row_ids);
|
|
59072
|
+
success = SearchGreater(state, key, true, max_count, row_ids);
|
|
59032
59073
|
break;
|
|
59033
59074
|
case ExpressionType::COMPARE_GREATERTHAN:
|
|
59034
|
-
success = SearchGreater(state, false, max_count, row_ids);
|
|
59075
|
+
success = SearchGreater(state, key, false, max_count, row_ids);
|
|
59035
59076
|
break;
|
|
59036
59077
|
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
|
|
59037
|
-
success = SearchLess(state, true, max_count, row_ids);
|
|
59078
|
+
success = SearchLess(state, key, true, max_count, row_ids);
|
|
59038
59079
|
break;
|
|
59039
59080
|
case ExpressionType::COMPARE_LESSTHAN:
|
|
59040
|
-
success = SearchLess(state, false, max_count, row_ids);
|
|
59081
|
+
success = SearchLess(state, key, false, max_count, row_ids);
|
|
59041
59082
|
break;
|
|
59042
59083
|
default:
|
|
59043
59084
|
throw InternalException("Operation not implemented");
|
|
59044
59085
|
}
|
|
59086
|
+
|
|
59045
59087
|
} else {
|
|
59046
|
-
|
|
59088
|
+
|
|
59047
59089
|
// two predicates
|
|
59090
|
+
lock_guard<mutex> l(lock);
|
|
59091
|
+
|
|
59048
59092
|
D_ASSERT(state->values[1].type().InternalType() == types[0]);
|
|
59093
|
+
auto upper_bound = CreateKey(arena_allocator, types[0], state->values[1]);
|
|
59094
|
+
|
|
59049
59095
|
bool left_inclusive = state->expressions[0] == ExpressionType ::COMPARE_GREATERTHANOREQUALTO;
|
|
59050
59096
|
bool right_inclusive = state->expressions[1] == ExpressionType ::COMPARE_LESSTHANOREQUALTO;
|
|
59051
|
-
success = SearchCloseRange(state, left_inclusive, right_inclusive, max_count, row_ids);
|
|
59097
|
+
success = SearchCloseRange(state, key, upper_bound, left_inclusive, right_inclusive, max_count, row_ids);
|
|
59052
59098
|
}
|
|
59099
|
+
|
|
59053
59100
|
if (!success) {
|
|
59054
59101
|
return false;
|
|
59055
59102
|
}
|
|
59056
59103
|
if (row_ids.empty()) {
|
|
59057
59104
|
return true;
|
|
59058
59105
|
}
|
|
59106
|
+
|
|
59059
59107
|
// sort the row ids
|
|
59060
59108
|
sort(row_ids.begin(), row_ids.end());
|
|
59061
59109
|
// duplicate eliminate the row ids and append them to the row ids of the state
|
|
@@ -59084,14 +59132,15 @@ void ART::VerifyExistence(DataChunk &chunk, VerifyExistenceType verify_type, str
|
|
|
59084
59132
|
ExecuteExpressions(chunk, expression_chunk);
|
|
59085
59133
|
|
|
59086
59134
|
// generate the keys for the given input
|
|
59087
|
-
|
|
59088
|
-
|
|
59135
|
+
ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
|
|
59136
|
+
vector<Key> keys(expression_chunk.size());
|
|
59137
|
+
GenerateKeys(arena_allocator, expression_chunk, keys);
|
|
59089
59138
|
|
|
59090
59139
|
for (idx_t i = 0; i < chunk.size(); i++) {
|
|
59091
|
-
if (
|
|
59140
|
+
if (keys[i].Empty()) {
|
|
59092
59141
|
continue;
|
|
59093
59142
|
}
|
|
59094
|
-
Node *node_ptr = Lookup(tree,
|
|
59143
|
+
Node *node_ptr = Lookup(tree, keys[i], 0);
|
|
59095
59144
|
bool throw_exception =
|
|
59096
59145
|
verify_type == VerifyExistenceType::APPEND_FK ? node_ptr == nullptr : node_ptr != nullptr;
|
|
59097
59146
|
if (!throw_exception) {
|
|
@@ -59148,15 +59197,24 @@ BlockPointer ART::Serialize(duckdb::MetaBlockWriter &writer) {
|
|
|
59148
59197
|
//===--------------------------------------------------------------------===//
|
|
59149
59198
|
// Merge ARTs
|
|
59150
59199
|
//===--------------------------------------------------------------------===//
|
|
59151
|
-
|
|
59200
|
+
bool ART::MergeIndexes(IndexLock &state, Index *other_index) {
|
|
59152
59201
|
|
|
59153
|
-
|
|
59154
|
-
|
|
59155
|
-
|
|
59156
|
-
|
|
59202
|
+
auto other_art = (ART *)other_index;
|
|
59203
|
+
|
|
59204
|
+
if (!this->tree) {
|
|
59205
|
+
this->tree = other_art->tree;
|
|
59206
|
+
other_art->tree = nullptr;
|
|
59207
|
+
return true;
|
|
59157
59208
|
}
|
|
59158
59209
|
|
|
59159
|
-
Node::MergeARTs(
|
|
59210
|
+
return Node::MergeARTs(this, other_art);
|
|
59211
|
+
}
|
|
59212
|
+
|
|
59213
|
+
string ART::ToString() {
|
|
59214
|
+
if (tree) {
|
|
59215
|
+
return tree->ToString(*this);
|
|
59216
|
+
}
|
|
59217
|
+
return "[empty]";
|
|
59160
59218
|
}
|
|
59161
59219
|
|
|
59162
59220
|
} // namespace duckdb
|
|
@@ -59166,25 +59224,41 @@ void ART::Merge(ART *l_art, ART *r_art) {
|
|
|
59166
59224
|
|
|
59167
59225
|
namespace duckdb {
|
|
59168
59226
|
|
|
59169
|
-
Key::Key(
|
|
59227
|
+
Key::Key() : len(0) {
|
|
59170
59228
|
}
|
|
59171
59229
|
|
|
59172
|
-
Key::Key(idx_t len) : len(len) {
|
|
59173
|
-
|
|
59230
|
+
Key::Key(data_ptr_t data, idx_t len) : len(len), data(data) {
|
|
59231
|
+
}
|
|
59232
|
+
|
|
59233
|
+
Key::Key(ArenaAllocator &allocator, idx_t len) : len(len) {
|
|
59234
|
+
data = allocator.Allocate(len);
|
|
59174
59235
|
}
|
|
59175
59236
|
|
|
59176
59237
|
template <>
|
|
59177
|
-
|
|
59238
|
+
Key Key::CreateKey(ArenaAllocator &allocator, string_t value) {
|
|
59178
59239
|
idx_t len = value.GetSize() + 1;
|
|
59179
|
-
auto data =
|
|
59180
|
-
memcpy(data
|
|
59240
|
+
auto data = allocator.Allocate(len);
|
|
59241
|
+
memcpy(data, value.GetDataUnsafe(), len - 1);
|
|
59181
59242
|
data[len - 1] = '\0';
|
|
59182
|
-
return
|
|
59243
|
+
return Key(data, len);
|
|
59183
59244
|
}
|
|
59184
59245
|
|
|
59185
59246
|
template <>
|
|
59186
|
-
|
|
59187
|
-
return Key::CreateKey(string_t(value, strlen(value)));
|
|
59247
|
+
Key Key::CreateKey(ArenaAllocator &allocator, const char *value) {
|
|
59248
|
+
return Key::CreateKey(allocator, string_t(value, strlen(value)));
|
|
59249
|
+
}
|
|
59250
|
+
|
|
59251
|
+
template <>
|
|
59252
|
+
void Key::CreateKey(ArenaAllocator &allocator, Key &key, string_t value) {
|
|
59253
|
+
key.len = value.GetSize() + 1;
|
|
59254
|
+
key.data = allocator.Allocate(key.len);
|
|
59255
|
+
memcpy(key.data, value.GetDataUnsafe(), key.len - 1);
|
|
59256
|
+
key.data[key.len - 1] = '\0';
|
|
59257
|
+
}
|
|
59258
|
+
|
|
59259
|
+
template <>
|
|
59260
|
+
void Key::CreateKey(ArenaAllocator &allocator, Key &key, const char *value) {
|
|
59261
|
+
Key::CreateKey(allocator, key, string_t(value, strlen(value)));
|
|
59188
59262
|
}
|
|
59189
59263
|
|
|
59190
59264
|
bool Key::operator>(const Key &k) const {
|
|
@@ -59235,6 +59309,19 @@ bool Key::operator==(const Key &k) const {
|
|
|
59235
59309
|
bool Key::ByteMatches(Key &other, idx_t &depth) {
|
|
59236
59310
|
return data[depth] == other[depth];
|
|
59237
59311
|
}
|
|
59312
|
+
|
|
59313
|
+
bool Key::Empty() {
|
|
59314
|
+
return len == 0;
|
|
59315
|
+
}
|
|
59316
|
+
|
|
59317
|
+
void Key::ConcatenateKey(ArenaAllocator &allocator, Key &other_key) {
|
|
59318
|
+
|
|
59319
|
+
auto compound_data = allocator.Allocate(len + other_key.len);
|
|
59320
|
+
memcpy(compound_data, data, len);
|
|
59321
|
+
memcpy(compound_data + len, other_key.data, other_key.len);
|
|
59322
|
+
len += other_key.len;
|
|
59323
|
+
data = compound_data;
|
|
59324
|
+
}
|
|
59238
59325
|
} // namespace duckdb
|
|
59239
59326
|
|
|
59240
59327
|
|
|
@@ -59358,16 +59445,16 @@ void Iterator::PushKey(Node *cur_node, uint16_t pos) {
|
|
|
59358
59445
|
}
|
|
59359
59446
|
}
|
|
59360
59447
|
|
|
59361
|
-
bool Iterator::Scan(Key
|
|
59448
|
+
bool Iterator::Scan(Key &bound, idx_t max_count, vector<row_t> &result_ids, bool is_inclusive) {
|
|
59362
59449
|
bool has_next;
|
|
59363
59450
|
do {
|
|
59364
|
-
if (bound) {
|
|
59451
|
+
if (!bound.Empty()) {
|
|
59365
59452
|
if (is_inclusive) {
|
|
59366
|
-
if (cur_key >
|
|
59453
|
+
if (cur_key > bound) {
|
|
59367
59454
|
break;
|
|
59368
59455
|
}
|
|
59369
59456
|
} else {
|
|
59370
|
-
if (cur_key >=
|
|
59457
|
+
if (cur_key >= bound) {
|
|
59371
59458
|
break;
|
|
59372
59459
|
}
|
|
59373
59460
|
}
|
|
@@ -59589,6 +59676,27 @@ void Leaf::Remove(row_t row_id) {
|
|
|
59589
59676
|
}
|
|
59590
59677
|
}
|
|
59591
59678
|
|
|
59679
|
+
string Leaf::ToString(Node *node) {
|
|
59680
|
+
|
|
59681
|
+
Leaf *leaf = (Leaf *)node;
|
|
59682
|
+
string str = "Leaf: [";
|
|
59683
|
+
for (idx_t i = 0; i < leaf->count; i++) {
|
|
59684
|
+
str += i == 0 ? to_string(leaf->row_ids[i]) : ", " + to_string(leaf->row_ids[i]);
|
|
59685
|
+
}
|
|
59686
|
+
return str + "]";
|
|
59687
|
+
}
|
|
59688
|
+
|
|
59689
|
+
void Leaf::Merge(Node *&l_node, Node *&r_node) {
|
|
59690
|
+
|
|
59691
|
+
Leaf *l_n = (Leaf *)l_node;
|
|
59692
|
+
Leaf *r_n = (Leaf *)r_node;
|
|
59693
|
+
|
|
59694
|
+
// append non-duplicate row_ids to l_n
|
|
59695
|
+
for (idx_t i = 0; i < r_n->count; i++) {
|
|
59696
|
+
l_n->Insert(r_n->GetRowId(i));
|
|
59697
|
+
}
|
|
59698
|
+
}
|
|
59699
|
+
|
|
59592
59700
|
BlockPointer Leaf::Serialize(duckdb::MetaBlockWriter &writer) {
|
|
59593
59701
|
auto ptr = writer.GetBlockPointer();
|
|
59594
59702
|
// Write Node Type
|
|
@@ -59616,21 +59724,6 @@ Leaf *Leaf::Deserialize(MetaBlockReader &reader) {
|
|
|
59616
59724
|
return new Leaf(move(elements), num_elements, prefix);
|
|
59617
59725
|
}
|
|
59618
59726
|
|
|
59619
|
-
void Leaf::Merge(bool &has_constraint, Node *&l_node, Node *&r_node) {
|
|
59620
|
-
|
|
59621
|
-
Leaf *l_n = (Leaf *)l_node;
|
|
59622
|
-
Leaf *r_n = (Leaf *)r_node;
|
|
59623
|
-
|
|
59624
|
-
// append non-duplicate row_ids to l_n
|
|
59625
|
-
for (idx_t i = 0; i < r_n->count; i++) {
|
|
59626
|
-
l_n->Insert(r_n->GetRowId(i));
|
|
59627
|
-
}
|
|
59628
|
-
|
|
59629
|
-
if (has_constraint && l_n->count > 1) {
|
|
59630
|
-
throw ConstraintException("Data contains duplicates on indexed column(s)");
|
|
59631
|
-
}
|
|
59632
|
-
}
|
|
59633
|
-
|
|
59634
59727
|
} // namespace duckdb
|
|
59635
59728
|
|
|
59636
59729
|
|
|
@@ -59782,6 +59875,36 @@ void Node::New(NodeType &type, Node *&node) {
|
|
|
59782
59875
|
}
|
|
59783
59876
|
}
|
|
59784
59877
|
|
|
59878
|
+
string Node::ToString(ART &art) {
|
|
59879
|
+
|
|
59880
|
+
string str = "Node";
|
|
59881
|
+
switch (this->type) {
|
|
59882
|
+
case NodeType::NLeaf:
|
|
59883
|
+
return Leaf::ToString(this);
|
|
59884
|
+
case NodeType::N4:
|
|
59885
|
+
str += to_string(Node4::GetSize());
|
|
59886
|
+
break;
|
|
59887
|
+
case NodeType::N16:
|
|
59888
|
+
str += to_string(Node16::GetSize());
|
|
59889
|
+
break;
|
|
59890
|
+
case NodeType::N48:
|
|
59891
|
+
str += to_string(Node48::GetSize());
|
|
59892
|
+
break;
|
|
59893
|
+
case NodeType::N256:
|
|
59894
|
+
str += to_string(Node256::GetSize());
|
|
59895
|
+
break;
|
|
59896
|
+
}
|
|
59897
|
+
|
|
59898
|
+
str += ": [";
|
|
59899
|
+
auto next_pos = GetNextPos(DConstants::INVALID_INDEX);
|
|
59900
|
+
while (next_pos != DConstants::INVALID_INDEX) {
|
|
59901
|
+
auto child = GetChild(art, next_pos);
|
|
59902
|
+
str += "(" + to_string(next_pos) + ", " + child->ToString(art) + ")";
|
|
59903
|
+
next_pos = GetNextPos(next_pos);
|
|
59904
|
+
}
|
|
59905
|
+
return str + "]";
|
|
59906
|
+
}
|
|
59907
|
+
|
|
59785
59908
|
BlockPointer Node::SerializeInternal(ART &art, duckdb::MetaBlockWriter &writer, InternalType &internal_type) {
|
|
59786
59909
|
// Iterate through children and annotate their offsets
|
|
59787
59910
|
vector<BlockPointer> child_offsets;
|
|
@@ -59879,7 +60002,7 @@ void UpdateParentsOfNodes(Node *&l_node, Node *&r_node, ParentsOfNodes &parents)
|
|
|
59879
60002
|
}
|
|
59880
60003
|
}
|
|
59881
60004
|
|
|
59882
|
-
|
|
60005
|
+
bool Merge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
|
|
59883
60006
|
|
|
59884
60007
|
// always try to merge the smaller node into the bigger node
|
|
59885
60008
|
// because maybe there is enough free space in the bigger node to fit the smaller one
|
|
@@ -59904,13 +60027,16 @@ void Merge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
|
|
|
59904
60027
|
case NodeType::NLeaf:
|
|
59905
60028
|
D_ASSERT(info.l_node->type == NodeType::NLeaf);
|
|
59906
60029
|
D_ASSERT(info.r_node->type == NodeType::NLeaf);
|
|
59907
|
-
|
|
59908
|
-
|
|
60030
|
+
if (info.l_art->IsUnique()) {
|
|
60031
|
+
return false;
|
|
60032
|
+
}
|
|
60033
|
+
Leaf::Merge(info.l_node, info.r_node);
|
|
60034
|
+
return true;
|
|
59909
60035
|
}
|
|
59910
60036
|
throw InternalException("Invalid node type for right node in merge.");
|
|
59911
60037
|
}
|
|
59912
60038
|
|
|
59913
|
-
|
|
60039
|
+
bool ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
|
|
59914
60040
|
|
|
59915
60041
|
auto &l_node = info.l_node;
|
|
59916
60042
|
auto &r_node = info.r_node;
|
|
@@ -59951,7 +60077,7 @@ void ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &paren
|
|
|
59951
60077
|
Node::InsertChild(l_node, mismatch_byte, r_node);
|
|
59952
60078
|
UpdateParentsOfNodes(l_node, null_parent, parents);
|
|
59953
60079
|
r_node = nullptr;
|
|
59954
|
-
return;
|
|
60080
|
+
return true;
|
|
59955
60081
|
}
|
|
59956
60082
|
|
|
59957
60083
|
// recurse
|
|
@@ -59978,9 +60104,10 @@ void ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &paren
|
|
|
59978
60104
|
l_node = new_node;
|
|
59979
60105
|
UpdateParentsOfNodes(l_node, null_parent, parents);
|
|
59980
60106
|
r_node = nullptr;
|
|
60107
|
+
return true;
|
|
59981
60108
|
}
|
|
59982
60109
|
|
|
59983
|
-
|
|
60110
|
+
bool Node::MergeAtByte(MergeInfo &info, idx_t depth, idx_t &l_child_pos, idx_t &r_pos, uint8_t &key_byte,
|
|
59984
60111
|
Node *&l_parent, idx_t l_pos) {
|
|
59985
60112
|
|
|
59986
60113
|
auto r_child = info.r_node->GetChild(*info.r_art, r_pos);
|
|
@@ -59992,22 +60119,22 @@ void Node::MergeAtByte(MergeInfo &info, idx_t depth, idx_t &l_child_pos, idx_t &
|
|
|
59992
60119
|
l_parent->ReplaceChildPointer(l_pos, info.l_node);
|
|
59993
60120
|
}
|
|
59994
60121
|
info.r_node->ReplaceChildPointer(r_pos, nullptr);
|
|
59995
|
-
return;
|
|
60122
|
+
return true;
|
|
59996
60123
|
}
|
|
59997
60124
|
|
|
59998
60125
|
// recurse
|
|
59999
60126
|
auto l_child = info.l_node->GetChild(*info.l_art, l_child_pos);
|
|
60000
60127
|
MergeInfo child_info(info.l_art, info.r_art, l_child, r_child);
|
|
60001
60128
|
ParentsOfNodes child_parents(info.l_node, l_child_pos, info.r_node, r_pos);
|
|
60002
|
-
ResolvePrefixesAndMerge(child_info, depth + 1, child_parents);
|
|
60129
|
+
return ResolvePrefixesAndMerge(child_info, depth + 1, child_parents);
|
|
60003
60130
|
}
|
|
60004
60131
|
|
|
60005
|
-
|
|
60132
|
+
bool Node::MergeARTs(ART *l_art, ART *r_art) {
|
|
60006
60133
|
|
|
60007
60134
|
Node *null_parent = nullptr;
|
|
60008
60135
|
MergeInfo info(l_art, r_art, l_art->tree, r_art->tree);
|
|
60009
60136
|
ParentsOfNodes parents(null_parent, 0, null_parent, 0);
|
|
60010
|
-
ResolvePrefixesAndMerge(info, 0, parents);
|
|
60137
|
+
return ResolvePrefixesAndMerge(info, 0, parents);
|
|
60011
60138
|
}
|
|
60012
60139
|
|
|
60013
60140
|
} // namespace duckdb
|
|
@@ -60137,15 +60264,18 @@ void Node16::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
60137
60264
|
}
|
|
60138
60265
|
}
|
|
60139
60266
|
|
|
60140
|
-
|
|
60267
|
+
bool Node16::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
|
|
60141
60268
|
|
|
60142
60269
|
Node16 *r_n = (Node16 *)info.r_node;
|
|
60143
60270
|
|
|
60144
60271
|
for (idx_t i = 0; i < info.r_node->count; i++) {
|
|
60145
60272
|
|
|
60146
60273
|
auto l_child_pos = info.l_node->GetChildPos(r_n->key[i]);
|
|
60147
|
-
Node::MergeAtByte(info, depth, l_child_pos, i, r_n->key[i], l_parent, l_pos)
|
|
60274
|
+
if (!Node::MergeAtByte(info, depth, l_child_pos, i, r_n->key[i], l_parent, l_pos)) {
|
|
60275
|
+
return false;
|
|
60276
|
+
}
|
|
60148
60277
|
}
|
|
60278
|
+
return true;
|
|
60149
60279
|
}
|
|
60150
60280
|
|
|
60151
60281
|
idx_t Node16::GetSize() {
|
|
@@ -60237,16 +60367,19 @@ void Node256::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
60237
60367
|
}
|
|
60238
60368
|
}
|
|
60239
60369
|
|
|
60240
|
-
|
|
60370
|
+
bool Node256::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
|
|
60241
60371
|
|
|
60242
60372
|
for (idx_t i = 0; i < 256; i++) {
|
|
60243
60373
|
if (info.r_node->GetChildPos(i) != DConstants::INVALID_INDEX) {
|
|
60244
60374
|
|
|
60245
60375
|
auto l_child_pos = info.l_node->GetChildPos(i);
|
|
60246
60376
|
auto key_byte = (uint8_t)i;
|
|
60247
|
-
Node::MergeAtByte(info, depth, l_child_pos, i, key_byte, l_parent, l_pos)
|
|
60377
|
+
if (!Node::MergeAtByte(info, depth, l_child_pos, i, key_byte, l_parent, l_pos)) {
|
|
60378
|
+
return false;
|
|
60379
|
+
}
|
|
60248
60380
|
}
|
|
60249
60381
|
}
|
|
60382
|
+
return true;
|
|
60250
60383
|
}
|
|
60251
60384
|
|
|
60252
60385
|
idx_t Node256::GetSize() {
|
|
@@ -60373,15 +60506,18 @@ void Node4::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
60373
60506
|
}
|
|
60374
60507
|
}
|
|
60375
60508
|
|
|
60376
|
-
|
|
60509
|
+
bool Node4::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
|
|
60377
60510
|
|
|
60378
60511
|
Node4 *r_n = (Node4 *)info.r_node;
|
|
60379
60512
|
|
|
60380
60513
|
for (idx_t i = 0; i < info.r_node->count; i++) {
|
|
60381
60514
|
|
|
60382
60515
|
auto l_child_pos = info.l_node->GetChildPos(r_n->key[i]);
|
|
60383
|
-
Node::MergeAtByte(info, depth, l_child_pos, i, r_n->key[i], l_parent, l_pos)
|
|
60516
|
+
if (!Node::MergeAtByte(info, depth, l_child_pos, i, r_n->key[i], l_parent, l_pos)) {
|
|
60517
|
+
return false;
|
|
60518
|
+
}
|
|
60384
60519
|
}
|
|
60520
|
+
return true;
|
|
60385
60521
|
}
|
|
60386
60522
|
|
|
60387
60523
|
idx_t Node4::GetSize() {
|
|
@@ -60505,7 +60641,7 @@ void Node48::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
60505
60641
|
}
|
|
60506
60642
|
}
|
|
60507
60643
|
|
|
60508
|
-
|
|
60644
|
+
bool Node48::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
|
|
60509
60645
|
|
|
60510
60646
|
Node48 *r_n = (Node48 *)info.r_node;
|
|
60511
60647
|
|
|
@@ -60514,9 +60650,12 @@ void Node48::Merge(MergeInfo &info, idx_t depth, Node *&l_parent, idx_t l_pos) {
|
|
|
60514
60650
|
|
|
60515
60651
|
auto l_child_pos = info.l_node->GetChildPos(i);
|
|
60516
60652
|
auto key_byte = (uint8_t)i;
|
|
60517
|
-
Node::MergeAtByte(info, depth, l_child_pos, i, key_byte, l_parent, l_pos)
|
|
60653
|
+
if (!Node::MergeAtByte(info, depth, l_child_pos, i, key_byte, l_parent, l_pos)) {
|
|
60654
|
+
return false;
|
|
60655
|
+
}
|
|
60518
60656
|
}
|
|
60519
60657
|
}
|
|
60658
|
+
return true;
|
|
60520
60659
|
}
|
|
60521
60660
|
|
|
60522
60661
|
idx_t Node48::GetSize() {
|
|
@@ -72728,7 +72867,8 @@ namespace duckdb {
|
|
|
72728
72867
|
|
|
72729
72868
|
class IndexJoinOperatorState : public OperatorState {
|
|
72730
72869
|
public:
|
|
72731
|
-
IndexJoinOperatorState(Allocator &allocator, const PhysicalIndexJoin &op)
|
|
72870
|
+
IndexJoinOperatorState(Allocator &allocator, const PhysicalIndexJoin &op)
|
|
72871
|
+
: probe_executor(allocator), arena_allocator(allocator), keys(STANDARD_VECTOR_SIZE) {
|
|
72732
72872
|
rhs_rows.resize(STANDARD_VECTOR_SIZE);
|
|
72733
72873
|
result_sizes.resize(STANDARD_VECTOR_SIZE);
|
|
72734
72874
|
|
|
@@ -72750,10 +72890,14 @@ public:
|
|
|
72750
72890
|
DataChunk join_keys;
|
|
72751
72891
|
DataChunk rhs_chunk;
|
|
72752
72892
|
SelectionVector rhs_sel;
|
|
72893
|
+
|
|
72753
72894
|
//! Vector of rows that mush be fetched for every LHS key
|
|
72754
72895
|
vector<vector<row_t>> rhs_rows;
|
|
72755
72896
|
ExpressionExecutor probe_executor;
|
|
72756
72897
|
|
|
72898
|
+
ArenaAllocator arena_allocator;
|
|
72899
|
+
vector<Key> keys;
|
|
72900
|
+
|
|
72757
72901
|
public:
|
|
72758
72902
|
void Finalize(PhysicalOperator *op, ExecutionContext &context) override {
|
|
72759
72903
|
context.thread.profiler.Flush(op, &probe_executor, "probe_executor", 0);
|
|
@@ -72858,22 +73002,25 @@ void PhysicalIndexJoin::Output(ExecutionContext &context, DataChunk &input, Data
|
|
|
72858
73002
|
}
|
|
72859
73003
|
|
|
72860
73004
|
void PhysicalIndexJoin::GetRHSMatches(ExecutionContext &context, DataChunk &input, OperatorState &state_p) const {
|
|
73005
|
+
|
|
72861
73006
|
auto &state = (IndexJoinOperatorState &)state_p;
|
|
72862
73007
|
auto &art = (ART &)*index;
|
|
72863
|
-
|
|
73008
|
+
|
|
73009
|
+
// generate the keys for this chunk
|
|
73010
|
+
state.arena_allocator.Reset();
|
|
73011
|
+
ART::GenerateKeys(state.arena_allocator, state.join_keys, state.keys);
|
|
73012
|
+
|
|
72864
73013
|
for (idx_t i = 0; i < input.size(); i++) {
|
|
72865
|
-
auto equal_value = state.join_keys.GetValue(0, i);
|
|
72866
|
-
auto index_state = art.InitializeScanSinglePredicate(transaction, equal_value, ExpressionType::COMPARE_EQUAL);
|
|
72867
73014
|
state.rhs_rows[i].clear();
|
|
72868
|
-
if (!
|
|
73015
|
+
if (!state.keys[i].Empty()) {
|
|
72869
73016
|
if (fetch_types.empty()) {
|
|
72870
73017
|
IndexLock lock;
|
|
72871
73018
|
index->InitializeLock(lock);
|
|
72872
|
-
art.SearchEqualJoinNoFetch(
|
|
73019
|
+
art.SearchEqualJoinNoFetch(state.keys[i], state.result_sizes[i]);
|
|
72873
73020
|
} else {
|
|
72874
73021
|
IndexLock lock;
|
|
72875
73022
|
index->InitializeLock(lock);
|
|
72876
|
-
art.SearchEqual(
|
|
73023
|
+
art.SearchEqual(state.keys[i], (idx_t)-1, state.rhs_rows[i]);
|
|
72877
73024
|
state.result_sizes[i] = state.rhs_rows[i].size();
|
|
72878
73025
|
}
|
|
72879
73026
|
} else {
|
|
@@ -80005,11 +80152,7 @@ void PhysicalCreateIndex::Combine(ExecutionContext &context, GlobalSinkState &gs
|
|
|
80005
80152
|
}
|
|
80006
80153
|
|
|
80007
80154
|
// merge the local index into the global index
|
|
80008
|
-
|
|
80009
|
-
IndexLock global_lock;
|
|
80010
|
-
gstate.global_index->InitializeLock(global_lock);
|
|
80011
|
-
gstate.global_index->MergeIndexes(lstate.local_index.get());
|
|
80012
|
-
}
|
|
80155
|
+
gstate.global_index->MergeIndexes(lstate.local_index.get());
|
|
80013
80156
|
}
|
|
80014
80157
|
|
|
80015
80158
|
SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
|
@@ -88375,6 +88518,9 @@ template <bool LAST, bool SKIP_NULLS>
|
|
|
88375
88518
|
struct FirstFunctionString : public FirstFunctionBase {
|
|
88376
88519
|
template <class STATE>
|
|
88377
88520
|
static void SetValue(STATE *state, string_t value, bool is_null) {
|
|
88521
|
+
if (LAST && state->is_set) {
|
|
88522
|
+
Destroy(state);
|
|
88523
|
+
}
|
|
88378
88524
|
if (is_null) {
|
|
88379
88525
|
if (!SKIP_NULLS) {
|
|
88380
88526
|
state->is_set = true;
|
|
@@ -193092,6 +193238,24 @@ data_ptr_t ArenaAllocator::Allocate(idx_t len) {
|
|
|
193092
193238
|
return result;
|
|
193093
193239
|
}
|
|
193094
193240
|
|
|
193241
|
+
void ArenaAllocator::Reset() {
|
|
193242
|
+
|
|
193243
|
+
if (head) {
|
|
193244
|
+
// destroy all chunks except the current one
|
|
193245
|
+
if (head->next) {
|
|
193246
|
+
auto current_next = move(head->next);
|
|
193247
|
+
while (current_next) {
|
|
193248
|
+
current_next = move(current_next->next);
|
|
193249
|
+
}
|
|
193250
|
+
}
|
|
193251
|
+
tail = head.get();
|
|
193252
|
+
|
|
193253
|
+
// reset the head
|
|
193254
|
+
head->current_position = 0;
|
|
193255
|
+
head->prev = nullptr;
|
|
193256
|
+
}
|
|
193257
|
+
}
|
|
193258
|
+
|
|
193095
193259
|
void ArenaAllocator::Destroy() {
|
|
193096
193260
|
head = nullptr;
|
|
193097
193261
|
tail = nullptr;
|
|
@@ -201767,15 +201931,15 @@ void Index::Delete(DataChunk &entries, Vector &row_identifiers) {
|
|
|
201767
201931
|
Delete(state, entries, row_identifiers);
|
|
201768
201932
|
}
|
|
201769
201933
|
|
|
201770
|
-
|
|
201934
|
+
bool Index::MergeIndexes(Index *other_index) {
|
|
201935
|
+
|
|
201936
|
+
IndexLock state;
|
|
201937
|
+
InitializeLock(state);
|
|
201771
201938
|
|
|
201772
|
-
// create the global index
|
|
201773
201939
|
switch (this->type) {
|
|
201774
201940
|
case IndexType::ART: {
|
|
201775
|
-
auto
|
|
201776
|
-
|
|
201777
|
-
ART::Merge(this_art, other_art);
|
|
201778
|
-
break;
|
|
201941
|
+
auto art = (ART *)this;
|
|
201942
|
+
return art->MergeIndexes(state, other_index);
|
|
201779
201943
|
}
|
|
201780
201944
|
default:
|
|
201781
201945
|
throw InternalException("Unimplemented index type for merge");
|