duckdb 0.5.2-dev2164.0 → 0.5.2-dev2189.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +460 -160
- package/src/duckdb.hpp +42 -9
- package/src/parquet-amalgamation.cpp +28977 -28977
package/src/duckdb.cpp
CHANGED
|
@@ -1767,15 +1767,21 @@ private:
|
|
|
1767
1767
|
|
|
1768
1768
|
namespace duckdb {
|
|
1769
1769
|
class Prefix {
|
|
1770
|
+
static constexpr idx_t PREFIX_INLINE_BYTES = 8;
|
|
1771
|
+
|
|
1770
1772
|
public:
|
|
1771
1773
|
Prefix();
|
|
1772
1774
|
// Prefix created from key starting on `depth`
|
|
1773
1775
|
Prefix(Key &key, uint32_t depth, uint32_t size);
|
|
1774
1776
|
// Prefix created from other prefix up to size
|
|
1775
1777
|
Prefix(Prefix &other_prefix, uint32_t size);
|
|
1778
|
+
~Prefix();
|
|
1776
1779
|
|
|
1777
1780
|
// Returns the Prefix's size
|
|
1778
1781
|
uint32_t Size() const;
|
|
1782
|
+
//! Return a pointer to the prefix data
|
|
1783
|
+
uint8_t *GetPrefixData();
|
|
1784
|
+
const uint8_t *GetPrefixData() const;
|
|
1779
1785
|
|
|
1780
1786
|
// Subscript operator
|
|
1781
1787
|
uint8_t &operator[](idx_t idx);
|
|
@@ -1803,8 +1809,17 @@ public:
|
|
|
1803
1809
|
uint32_t MismatchPosition(Prefix &other);
|
|
1804
1810
|
|
|
1805
1811
|
private:
|
|
1806
|
-
unique_ptr<uint8_t[]> prefix;
|
|
1807
1812
|
uint32_t size;
|
|
1813
|
+
union {
|
|
1814
|
+
uint8_t *ptr;
|
|
1815
|
+
uint8_t inlined[8];
|
|
1816
|
+
} value;
|
|
1817
|
+
|
|
1818
|
+
private:
|
|
1819
|
+
bool IsInlined() const;
|
|
1820
|
+
uint8_t *AllocatePrefix(uint32_t size);
|
|
1821
|
+
void Overwrite(uint32_t new_size, uint8_t *data);
|
|
1822
|
+
void Destroy();
|
|
1808
1823
|
};
|
|
1809
1824
|
|
|
1810
1825
|
} // namespace duckdb
|
|
@@ -1813,6 +1828,7 @@ private:
|
|
|
1813
1828
|
|
|
1814
1829
|
|
|
1815
1830
|
|
|
1831
|
+
|
|
1816
1832
|
namespace duckdb {
|
|
1817
1833
|
enum class NodeType : uint8_t { NLeaf = 0, N4 = 1, N16 = 2, N48 = 3, N256 = 4 };
|
|
1818
1834
|
class ART;
|
|
@@ -1867,6 +1883,7 @@ public:
|
|
|
1867
1883
|
//! Compressed path (prefix)
|
|
1868
1884
|
Prefix prefix;
|
|
1869
1885
|
|
|
1886
|
+
static void Delete(Node *node);
|
|
1870
1887
|
//! Get the position of a child corresponding exactly to the specific byte, returns DConstants::INVALID_INDEX if not
|
|
1871
1888
|
//! exists
|
|
1872
1889
|
virtual idx_t GetChildPos(uint8_t k) {
|
|
@@ -1928,16 +1945,21 @@ namespace duckdb {
|
|
|
1928
1945
|
class Leaf : public Node {
|
|
1929
1946
|
public:
|
|
1930
1947
|
Leaf(Key &value, uint32_t depth, row_t row_id);
|
|
1931
|
-
Leaf(Key &value, uint32_t depth,
|
|
1932
|
-
Leaf(
|
|
1948
|
+
Leaf(Key &value, uint32_t depth, row_t *row_ids, idx_t num_elements);
|
|
1949
|
+
Leaf(row_t *row_ids, idx_t num_elements, Prefix &prefix);
|
|
1950
|
+
Leaf(row_t row_id, Prefix &prefix);
|
|
1951
|
+
~Leaf();
|
|
1933
1952
|
|
|
1934
|
-
idx_t
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
}
|
|
1953
|
+
row_t GetRowId(idx_t index);
|
|
1954
|
+
idx_t GetCapacity() const;
|
|
1955
|
+
bool IsInlined() const;
|
|
1956
|
+
row_t *GetRowIds();
|
|
1939
1957
|
|
|
1940
1958
|
public:
|
|
1959
|
+
static Leaf *New(Key &value, uint32_t depth, row_t row_id);
|
|
1960
|
+
static Leaf *New(Key &value, uint32_t depth, row_t *row_ids, idx_t num_elements);
|
|
1961
|
+
static Leaf *New(row_t *row_ids, idx_t num_elements, Prefix &prefix);
|
|
1962
|
+
static Leaf *New(row_t row_id, Prefix &prefix);
|
|
1941
1963
|
//! Insert a row_id into a leaf
|
|
1942
1964
|
void Insert(row_t row_id);
|
|
1943
1965
|
//! Remove a row_id from a leaf
|
|
@@ -1954,7 +1976,13 @@ public:
|
|
|
1954
1976
|
static Leaf *Deserialize(duckdb::MetaBlockReader &reader);
|
|
1955
1977
|
|
|
1956
1978
|
private:
|
|
1957
|
-
|
|
1979
|
+
union {
|
|
1980
|
+
row_t inlined;
|
|
1981
|
+
row_t *ptr;
|
|
1982
|
+
} rowids;
|
|
1983
|
+
|
|
1984
|
+
private:
|
|
1985
|
+
row_t *Resize(row_t *current_row_ids, uint32_t current_count, idx_t new_capacity);
|
|
1958
1986
|
};
|
|
1959
1987
|
|
|
1960
1988
|
} // namespace duckdb
|
|
@@ -2043,6 +2071,7 @@ private:
|
|
|
2043
2071
|
|
|
2044
2072
|
|
|
2045
2073
|
|
|
2074
|
+
|
|
2046
2075
|
namespace duckdb {
|
|
2047
2076
|
|
|
2048
2077
|
class ART;
|
|
@@ -2094,6 +2123,7 @@ public:
|
|
|
2094
2123
|
ARTPointer children[16];
|
|
2095
2124
|
|
|
2096
2125
|
public:
|
|
2126
|
+
static Node16 *New();
|
|
2097
2127
|
//! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
|
|
2098
2128
|
idx_t GetChildPos(uint8_t k) override;
|
|
2099
2129
|
//! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
|
|
@@ -2139,6 +2169,7 @@ public:
|
|
|
2139
2169
|
ARTPointer children[256];
|
|
2140
2170
|
|
|
2141
2171
|
public:
|
|
2172
|
+
static Node256 *New();
|
|
2142
2173
|
//! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
|
|
2143
2174
|
idx_t GetChildPos(uint8_t k) override;
|
|
2144
2175
|
//! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
|
|
@@ -2181,11 +2212,13 @@ namespace duckdb {
|
|
|
2181
2212
|
class Node4 : public Node {
|
|
2182
2213
|
public:
|
|
2183
2214
|
Node4();
|
|
2215
|
+
|
|
2184
2216
|
uint8_t key[4];
|
|
2185
2217
|
// Pointers to the child nodes
|
|
2186
2218
|
ARTPointer children[4];
|
|
2187
2219
|
|
|
2188
2220
|
public:
|
|
2221
|
+
static Node4 *New();
|
|
2189
2222
|
//! Get position of a byte, returns DConstants::INVALID_INDEX if not exists
|
|
2190
2223
|
idx_t GetChildPos(uint8_t k) override;
|
|
2191
2224
|
//! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
|
|
@@ -2232,6 +2265,7 @@ public:
|
|
|
2232
2265
|
ARTPointer children[48];
|
|
2233
2266
|
|
|
2234
2267
|
public:
|
|
2268
|
+
static Node48 *New();
|
|
2235
2269
|
//! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
|
|
2236
2270
|
idx_t GetChildPos(uint8_t k) override;
|
|
2237
2271
|
//! Get the position of the first child that is greater or equal to the specific byte, or DConstants::INVALID_INDEX
|
|
@@ -2362,6 +2396,12 @@ private:
|
|
|
2362
2396
|
bool right_inclusive, idx_t max_count, vector<row_t> &result_ids);
|
|
2363
2397
|
|
|
2364
2398
|
void VerifyExistence(DataChunk &chunk, VerifyExistenceType verify_type, string *err_msg_ptr = nullptr);
|
|
2399
|
+
|
|
2400
|
+
private:
|
|
2401
|
+
//! The estimated ART memory consumption
|
|
2402
|
+
idx_t estimated_art_size;
|
|
2403
|
+
//! The estimated memory consumption of a single key
|
|
2404
|
+
idx_t estimated_key_size;
|
|
2365
2405
|
};
|
|
2366
2406
|
|
|
2367
2407
|
} // namespace duckdb
|
|
@@ -6543,6 +6583,7 @@ Allocator::~Allocator() {
|
|
|
6543
6583
|
}
|
|
6544
6584
|
|
|
6545
6585
|
data_ptr_t Allocator::AllocateData(idx_t size) {
|
|
6586
|
+
D_ASSERT(size > 0);
|
|
6546
6587
|
auto result = allocate_function(private_data.get(), size);
|
|
6547
6588
|
#ifdef DEBUG
|
|
6548
6589
|
D_ASSERT(private_data);
|
|
@@ -6555,6 +6596,7 @@ void Allocator::FreeData(data_ptr_t pointer, idx_t size) {
|
|
|
6555
6596
|
if (!pointer) {
|
|
6556
6597
|
return;
|
|
6557
6598
|
}
|
|
6599
|
+
D_ASSERT(size > 0);
|
|
6558
6600
|
#ifdef DEBUG
|
|
6559
6601
|
D_ASSERT(private_data);
|
|
6560
6602
|
private_data->debug_info->FreeData(pointer, size);
|
|
@@ -6574,11 +6616,15 @@ data_ptr_t Allocator::ReallocateData(data_ptr_t pointer, idx_t old_size, idx_t s
|
|
|
6574
6616
|
return new_pointer;
|
|
6575
6617
|
}
|
|
6576
6618
|
|
|
6577
|
-
Allocator &Allocator::
|
|
6578
|
-
static Allocator DEFAULT_ALLOCATOR;
|
|
6619
|
+
shared_ptr<Allocator> &Allocator::DefaultAllocatorReference() {
|
|
6620
|
+
static shared_ptr<Allocator> DEFAULT_ALLOCATOR = make_shared<Allocator>();
|
|
6579
6621
|
return DEFAULT_ALLOCATOR;
|
|
6580
6622
|
}
|
|
6581
6623
|
|
|
6624
|
+
Allocator &Allocator::DefaultAllocator() {
|
|
6625
|
+
return *DefaultAllocatorReference();
|
|
6626
|
+
}
|
|
6627
|
+
|
|
6582
6628
|
//===--------------------------------------------------------------------===//
|
|
6583
6629
|
// Debug Info (extended)
|
|
6584
6630
|
//===--------------------------------------------------------------------===//
|
|
@@ -60543,7 +60589,8 @@ namespace duckdb {
|
|
|
60543
60589
|
ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
60544
60590
|
const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type,
|
|
60545
60591
|
DatabaseInstance &db, idx_t block_id, idx_t block_offset)
|
|
60546
|
-
: Index(IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type), db(db)
|
|
60592
|
+
: Index(IndexType::ART, table_io_manager, column_ids, unbound_expressions, constraint_type), db(db),
|
|
60593
|
+
estimated_art_size(0), estimated_key_size(16) {
|
|
60547
60594
|
if (block_id != DConstants::INVALID_INDEX) {
|
|
60548
60595
|
tree = Node::Deserialize(*this, block_id, block_offset);
|
|
60549
60596
|
} else {
|
|
@@ -60554,17 +60601,28 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
|
60554
60601
|
switch (types[i]) {
|
|
60555
60602
|
case PhysicalType::BOOL:
|
|
60556
60603
|
case PhysicalType::INT8:
|
|
60557
|
-
case PhysicalType::INT16:
|
|
60558
|
-
case PhysicalType::INT32:
|
|
60559
|
-
case PhysicalType::INT64:
|
|
60560
|
-
case PhysicalType::INT128:
|
|
60561
60604
|
case PhysicalType::UINT8:
|
|
60605
|
+
estimated_key_size += sizeof(int8_t);
|
|
60606
|
+
break;
|
|
60607
|
+
case PhysicalType::INT16:
|
|
60562
60608
|
case PhysicalType::UINT16:
|
|
60609
|
+
estimated_key_size += sizeof(int16_t);
|
|
60610
|
+
break;
|
|
60611
|
+
case PhysicalType::INT32:
|
|
60563
60612
|
case PhysicalType::UINT32:
|
|
60564
|
-
case PhysicalType::UINT64:
|
|
60565
60613
|
case PhysicalType::FLOAT:
|
|
60614
|
+
estimated_key_size += sizeof(int32_t);
|
|
60615
|
+
break;
|
|
60616
|
+
case PhysicalType::INT64:
|
|
60617
|
+
case PhysicalType::UINT64:
|
|
60566
60618
|
case PhysicalType::DOUBLE:
|
|
60619
|
+
estimated_key_size += sizeof(int64_t);
|
|
60620
|
+
break;
|
|
60621
|
+
case PhysicalType::INT128:
|
|
60622
|
+
estimated_key_size += sizeof(hugeint_t);
|
|
60623
|
+
break;
|
|
60567
60624
|
case PhysicalType::VARCHAR:
|
|
60625
|
+
estimated_key_size += 16; // oh well
|
|
60568
60626
|
break;
|
|
60569
60627
|
default:
|
|
60570
60628
|
throw InvalidTypeException(logical_types[i], "Invalid type for index");
|
|
@@ -60573,8 +60631,12 @@ ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
|
|
60573
60631
|
}
|
|
60574
60632
|
|
|
60575
60633
|
ART::~ART() {
|
|
60634
|
+
if (estimated_art_size > 0) {
|
|
60635
|
+
BufferManager::GetBufferManager(db).FreeReservedMemory(estimated_art_size);
|
|
60636
|
+
estimated_art_size = 0;
|
|
60637
|
+
}
|
|
60576
60638
|
if (tree) {
|
|
60577
|
-
|
|
60639
|
+
Node::Delete(tree);
|
|
60578
60640
|
tree = nullptr;
|
|
60579
60641
|
}
|
|
60580
60642
|
}
|
|
@@ -60777,7 +60839,6 @@ void Construct(vector<Key> &keys, row_t *row_ids, Node *&node, KeySection &key_s
|
|
|
60777
60839
|
|
|
60778
60840
|
// we reached a leaf, i.e. all the bytes of start_key and end_key match
|
|
60779
60841
|
if (start_key.len == key_section.depth) {
|
|
60780
|
-
|
|
60781
60842
|
// end_idx is inclusive
|
|
60782
60843
|
auto num_row_ids = key_section.end - key_section.start + 1;
|
|
60783
60844
|
|
|
@@ -60786,14 +60847,7 @@ void Construct(vector<Key> &keys, row_t *row_ids, Node *&node, KeySection &key_s
|
|
|
60786
60847
|
throw ConstraintException("New data contains duplicates on indexed column(s)");
|
|
60787
60848
|
}
|
|
60788
60849
|
|
|
60789
|
-
|
|
60790
|
-
auto new_row_ids = unique_ptr<row_t[]>(new row_t[num_row_ids]);
|
|
60791
|
-
for (idx_t i = 0; i < num_row_ids; i++) {
|
|
60792
|
-
new_row_ids[i] = row_ids[key_section.start + i];
|
|
60793
|
-
}
|
|
60794
|
-
|
|
60795
|
-
node = new Leaf(start_key, prefix_start, move(new_row_ids), num_row_ids);
|
|
60796
|
-
|
|
60850
|
+
node = Leaf::New(start_key, prefix_start, row_ids + key_section.start, num_row_ids);
|
|
60797
60851
|
} else { // create a new node and recurse
|
|
60798
60852
|
|
|
60799
60853
|
// we will find at least two child entries of this node, otherwise we'd have reached a leaf
|
|
@@ -60881,6 +60935,10 @@ bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
|
|
|
60881
60935
|
vector<Key> keys(input.size());
|
|
60882
60936
|
GenerateKeys(arena_allocator, input, keys);
|
|
60883
60937
|
|
|
60938
|
+
idx_t extra_memory = estimated_key_size * input.size();
|
|
60939
|
+
BufferManager::GetBufferManager(db).ReserveMemory(extra_memory);
|
|
60940
|
+
estimated_art_size += extra_memory;
|
|
60941
|
+
|
|
60884
60942
|
// now insert the elements into the index
|
|
60885
60943
|
row_ids.Flatten(input.size());
|
|
60886
60944
|
auto row_identifiers = FlatVector::GetData<row_t>(row_ids);
|
|
@@ -60952,7 +61010,7 @@ bool ART::Insert(Node *&node, Key &key, idx_t depth, row_t row_id) {
|
|
|
60952
61010
|
|
|
60953
61011
|
if (!node) {
|
|
60954
61012
|
// node is currently empty, create a leaf here with the key
|
|
60955
|
-
node =
|
|
61013
|
+
node = Leaf::New(key, depth, row_id);
|
|
60956
61014
|
return true;
|
|
60957
61015
|
}
|
|
60958
61016
|
|
|
@@ -60975,11 +61033,11 @@ bool ART::Insert(Node *&node, Key &key, idx_t depth, row_t row_id) {
|
|
|
60975
61033
|
}
|
|
60976
61034
|
}
|
|
60977
61035
|
|
|
60978
|
-
Node *new_node =
|
|
61036
|
+
Node *new_node = Node4::New();
|
|
60979
61037
|
new_node->prefix = Prefix(key, depth, new_prefix_length);
|
|
60980
61038
|
auto key_byte = node->prefix.Reduce(new_prefix_length);
|
|
60981
61039
|
Node4::InsertChild(new_node, key_byte, node);
|
|
60982
|
-
Node *leaf_node =
|
|
61040
|
+
Node *leaf_node = Leaf::New(key, depth + new_prefix_length + 1, row_id);
|
|
60983
61041
|
Node4::InsertChild(new_node, key[depth + new_prefix_length], leaf_node);
|
|
60984
61042
|
node = new_node;
|
|
60985
61043
|
return true;
|
|
@@ -60990,13 +61048,13 @@ bool ART::Insert(Node *&node, Key &key, idx_t depth, row_t row_id) {
|
|
|
60990
61048
|
uint32_t mismatch_pos = node->prefix.KeyMismatchPosition(key, depth);
|
|
60991
61049
|
if (mismatch_pos != node->prefix.Size()) {
|
|
60992
61050
|
// Prefix differs, create new node
|
|
60993
|
-
Node *new_node =
|
|
61051
|
+
Node *new_node = Node4::New();
|
|
60994
61052
|
new_node->prefix = Prefix(key, depth, mismatch_pos);
|
|
60995
61053
|
// Break up prefix
|
|
60996
61054
|
auto key_byte = node->prefix.Reduce(mismatch_pos);
|
|
60997
61055
|
Node4::InsertChild(new_node, key_byte, node);
|
|
60998
61056
|
|
|
60999
|
-
Node *leaf_node =
|
|
61057
|
+
Node *leaf_node = Leaf::New(key, depth + mismatch_pos + 1, row_id);
|
|
61000
61058
|
Node4::InsertChild(new_node, key[depth + mismatch_pos], leaf_node);
|
|
61001
61059
|
node = new_node;
|
|
61002
61060
|
return true;
|
|
@@ -61013,7 +61071,7 @@ bool ART::Insert(Node *&node, Key &key, idx_t depth, row_t row_id) {
|
|
|
61013
61071
|
node->ReplaceChildPointer(pos, child);
|
|
61014
61072
|
return insertion_result;
|
|
61015
61073
|
}
|
|
61016
|
-
Node *new_node =
|
|
61074
|
+
Node *new_node = Leaf::New(key, depth + 1, row_id);
|
|
61017
61075
|
Node::InsertChild(node, key[depth], new_node);
|
|
61018
61076
|
return true;
|
|
61019
61077
|
}
|
|
@@ -61028,6 +61086,10 @@ void ART::Delete(IndexLock &state, DataChunk &input, Vector &row_ids) {
|
|
|
61028
61086
|
// first resolve the expressions
|
|
61029
61087
|
ExecuteExpressions(input, expression);
|
|
61030
61088
|
|
|
61089
|
+
idx_t released_memory = MinValue<idx_t>(estimated_art_size, estimated_key_size * input.size());
|
|
61090
|
+
BufferManager::GetBufferManager(db).FreeReservedMemory(released_memory);
|
|
61091
|
+
estimated_art_size -= released_memory;
|
|
61092
|
+
|
|
61031
61093
|
// then generate the keys for the given input
|
|
61032
61094
|
ArenaAllocator arena_allocator(Allocator::DefaultAllocator());
|
|
61033
61095
|
vector<Key> keys(expression.size());
|
|
@@ -61064,7 +61126,7 @@ void ART::Erase(Node *&node, Key &key, idx_t depth, row_t row_id) {
|
|
|
61064
61126
|
auto leaf = static_cast<Leaf *>(node);
|
|
61065
61127
|
leaf->Remove(row_id);
|
|
61066
61128
|
if (leaf->count == 0) {
|
|
61067
|
-
|
|
61129
|
+
Node::Delete(node);
|
|
61068
61130
|
node = nullptr;
|
|
61069
61131
|
}
|
|
61070
61132
|
|
|
@@ -61416,9 +61478,9 @@ BlockPointer ART::Serialize(duckdb::MetaBlockWriter &writer) {
|
|
|
61416
61478
|
// Merge ARTs
|
|
61417
61479
|
//===--------------------------------------------------------------------===//
|
|
61418
61480
|
bool ART::MergeIndexes(IndexLock &state, Index *other_index) {
|
|
61419
|
-
|
|
61420
61481
|
auto other_art = (ART *)other_index;
|
|
61421
|
-
|
|
61482
|
+
estimated_art_size += other_art->estimated_art_size;
|
|
61483
|
+
other_art->estimated_art_size = 0;
|
|
61422
61484
|
if (!this->tree) {
|
|
61423
61485
|
this->tree = other_art->tree;
|
|
61424
61486
|
other_art->tree = nullptr;
|
|
@@ -61831,45 +61893,104 @@ bool Iterator::LowerBound(Node *node, Key &key, bool inclusive) {
|
|
|
61831
61893
|
#include <cstring>
|
|
61832
61894
|
|
|
61833
61895
|
namespace duckdb {
|
|
61896
|
+
idx_t Leaf::GetCapacity() const {
|
|
61897
|
+
return IsInlined() ? 1 : rowids.ptr[0];
|
|
61898
|
+
}
|
|
61899
|
+
|
|
61900
|
+
bool Leaf::IsInlined() const {
|
|
61901
|
+
return count <= 1;
|
|
61902
|
+
}
|
|
61903
|
+
|
|
61904
|
+
row_t Leaf::GetRowId(idx_t index) {
|
|
61905
|
+
D_ASSERT(index < count);
|
|
61906
|
+
if (IsInlined()) {
|
|
61907
|
+
return rowids.inlined;
|
|
61908
|
+
} else {
|
|
61909
|
+
D_ASSERT(rowids.ptr[0] >= count);
|
|
61910
|
+
return rowids.ptr[index + 1];
|
|
61911
|
+
}
|
|
61912
|
+
}
|
|
61913
|
+
|
|
61914
|
+
row_t *Leaf::GetRowIds() {
|
|
61915
|
+
if (IsInlined()) {
|
|
61916
|
+
return &rowids.inlined;
|
|
61917
|
+
} else {
|
|
61918
|
+
return rowids.ptr + 1;
|
|
61919
|
+
}
|
|
61920
|
+
}
|
|
61834
61921
|
|
|
61835
61922
|
Leaf::Leaf(Key &value, uint32_t depth, row_t row_id) : Node(NodeType::NLeaf) {
|
|
61836
|
-
capacity = 1;
|
|
61837
|
-
row_ids = unique_ptr<row_t[]>(new row_t[capacity]);
|
|
61838
|
-
row_ids[0] = row_id;
|
|
61839
61923
|
count = 1;
|
|
61924
|
+
rowids.inlined = row_id;
|
|
61840
61925
|
D_ASSERT(value.len >= depth);
|
|
61841
61926
|
prefix = Prefix(value, depth, value.len - depth);
|
|
61842
61927
|
}
|
|
61843
61928
|
|
|
61844
|
-
Leaf::Leaf(Key &value, uint32_t depth,
|
|
61845
|
-
|
|
61846
|
-
|
|
61929
|
+
Leaf::Leaf(Key &value, uint32_t depth, row_t *row_ids_p, idx_t num_elements_p) : Node(NodeType::NLeaf) {
|
|
61930
|
+
D_ASSERT(num_elements_p >= 1);
|
|
61931
|
+
if (num_elements_p == 1) {
|
|
61932
|
+
// we can inline the row ids
|
|
61933
|
+
rowids.inlined = row_ids_p[0];
|
|
61934
|
+
} else {
|
|
61935
|
+
// new row ids of this leaf
|
|
61936
|
+
count = 0;
|
|
61937
|
+
Resize(row_ids_p, num_elements_p, num_elements_p);
|
|
61938
|
+
}
|
|
61847
61939
|
count = num_elements_p;
|
|
61848
61940
|
D_ASSERT(value.len >= depth);
|
|
61849
61941
|
prefix = Prefix(value, depth, value.len - depth);
|
|
61850
61942
|
}
|
|
61851
61943
|
|
|
61852
|
-
Leaf::Leaf(
|
|
61853
|
-
|
|
61854
|
-
|
|
61944
|
+
Leaf::Leaf(row_t *row_ids_p, idx_t num_elements_p, Prefix &prefix_p) : Node(NodeType::NLeaf) {
|
|
61945
|
+
D_ASSERT(num_elements_p > 1);
|
|
61946
|
+
D_ASSERT(row_ids_p[0] == row_t(num_elements_p)); // first element should contain capacity
|
|
61947
|
+
rowids.ptr = row_ids_p;
|
|
61855
61948
|
count = num_elements_p;
|
|
61856
61949
|
prefix = prefix_p;
|
|
61857
61950
|
}
|
|
61858
61951
|
|
|
61859
|
-
|
|
61952
|
+
Leaf::Leaf(row_t row_id, Prefix &prefix_p) : Node(NodeType::NLeaf) {
|
|
61953
|
+
rowids.inlined = row_id;
|
|
61954
|
+
count = 1;
|
|
61955
|
+
prefix = prefix_p;
|
|
61956
|
+
}
|
|
61860
61957
|
|
|
61861
|
-
|
|
61958
|
+
Leaf::~Leaf() {
|
|
61959
|
+
if (!IsInlined()) {
|
|
61960
|
+
DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
|
|
61961
|
+
count = 0;
|
|
61962
|
+
}
|
|
61963
|
+
}
|
|
61964
|
+
|
|
61965
|
+
row_t *Leaf::Resize(row_t *current_row_ids, uint32_t current_count, idx_t new_capacity) {
|
|
61966
|
+
D_ASSERT(new_capacity >= current_count);
|
|
61967
|
+
auto new_allocation = AllocateArray<row_t>(new_capacity + 1);
|
|
61968
|
+
new_allocation[0] = new_capacity;
|
|
61969
|
+
auto new_row_ids = new_allocation + 1;
|
|
61970
|
+
memcpy(new_row_ids, current_row_ids, current_count * sizeof(row_t));
|
|
61971
|
+
if (!IsInlined()) {
|
|
61972
|
+
// delete the old data
|
|
61973
|
+
DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
|
|
61974
|
+
}
|
|
61975
|
+
// set up the new pointers
|
|
61976
|
+
rowids.ptr = new_allocation;
|
|
61977
|
+
return new_row_ids;
|
|
61978
|
+
}
|
|
61979
|
+
|
|
61980
|
+
void Leaf::Insert(row_t row_id) {
|
|
61981
|
+
auto capacity = GetCapacity();
|
|
61982
|
+
row_t *row_ids = GetRowIds();
|
|
61983
|
+
D_ASSERT(count <= capacity);
|
|
61862
61984
|
if (count == capacity) {
|
|
61863
|
-
|
|
61864
|
-
|
|
61865
|
-
capacity *= 2;
|
|
61866
|
-
row_ids = move(new_row_id);
|
|
61985
|
+
// Grow array
|
|
61986
|
+
row_ids = Resize(row_ids, count, capacity * 2);
|
|
61867
61987
|
}
|
|
61868
61988
|
row_ids[count++] = row_id;
|
|
61869
61989
|
}
|
|
61870
61990
|
|
|
61871
61991
|
void Leaf::Remove(row_t row_id) {
|
|
61872
61992
|
idx_t entry_offset = DConstants::INVALID_INDEX;
|
|
61993
|
+
row_t *row_ids = GetRowIds();
|
|
61873
61994
|
for (idx_t i = 0; i < count; i++) {
|
|
61874
61995
|
if (row_ids[i] == row_id) {
|
|
61875
61996
|
entry_offset = i;
|
|
@@ -61879,33 +62000,48 @@ void Leaf::Remove(row_t row_id) {
|
|
|
61879
62000
|
if (entry_offset == DConstants::INVALID_INDEX) {
|
|
61880
62001
|
return;
|
|
61881
62002
|
}
|
|
62003
|
+
if (IsInlined()) {
|
|
62004
|
+
D_ASSERT(count == 1);
|
|
62005
|
+
count--;
|
|
62006
|
+
return;
|
|
62007
|
+
}
|
|
61882
62008
|
count--;
|
|
62009
|
+
if (count == 1) {
|
|
62010
|
+
// after erasing we can now inline the leaf
|
|
62011
|
+
// delete the pointer and inline the remaining rowid
|
|
62012
|
+
auto remaining_row_id = row_ids[0] == row_id ? row_ids[1] : row_ids[0];
|
|
62013
|
+
DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
|
|
62014
|
+
rowids.inlined = remaining_row_id;
|
|
62015
|
+
return;
|
|
62016
|
+
}
|
|
62017
|
+
auto capacity = GetCapacity();
|
|
61883
62018
|
if (capacity > 2 && count < capacity / 2) {
|
|
61884
62019
|
// Shrink array, if less than half full
|
|
61885
|
-
auto
|
|
61886
|
-
|
|
61887
|
-
|
|
61888
|
-
|
|
61889
|
-
|
|
61890
|
-
row_ids
|
|
62020
|
+
auto new_capacity = capacity / 2;
|
|
62021
|
+
auto new_allocation = AllocateArray<row_t>(new_capacity + 1);
|
|
62022
|
+
new_allocation[0] = new_capacity;
|
|
62023
|
+
auto new_row_ids = new_allocation + 1;
|
|
62024
|
+
memcpy(new_row_ids, row_ids, entry_offset * sizeof(row_t));
|
|
62025
|
+
memcpy(new_row_ids + entry_offset, row_ids + entry_offset + 1, (count - entry_offset) * sizeof(row_t));
|
|
62026
|
+
DeleteArray<row_t>(rowids.ptr, rowids.ptr[0] + 1);
|
|
62027
|
+
rowids.ptr = new_allocation;
|
|
61891
62028
|
} else {
|
|
61892
62029
|
// Copy the rest
|
|
61893
|
-
memmove(row_ids
|
|
62030
|
+
memmove(row_ids + entry_offset, row_ids + entry_offset + 1, (count - entry_offset) * sizeof(row_t));
|
|
61894
62031
|
}
|
|
61895
62032
|
}
|
|
61896
62033
|
|
|
61897
62034
|
string Leaf::ToString(Node *node) {
|
|
61898
|
-
|
|
61899
62035
|
Leaf *leaf = (Leaf *)node;
|
|
61900
62036
|
string str = "Leaf: [";
|
|
62037
|
+
auto row_ids = leaf->GetRowIds();
|
|
61901
62038
|
for (idx_t i = 0; i < leaf->count; i++) {
|
|
61902
|
-
str += i == 0 ? to_string(
|
|
62039
|
+
str += i == 0 ? to_string(row_ids[i]) : ", " + to_string(row_ids[i]);
|
|
61903
62040
|
}
|
|
61904
62041
|
return str + "]";
|
|
61905
62042
|
}
|
|
61906
62043
|
|
|
61907
62044
|
void Leaf::Merge(Node *&l_node, Node *&r_node) {
|
|
61908
|
-
|
|
61909
62045
|
Leaf *l_n = (Leaf *)l_node;
|
|
61910
62046
|
Leaf *r_n = (Leaf *)r_node;
|
|
61911
62047
|
|
|
@@ -61923,8 +62059,9 @@ BlockPointer Leaf::Serialize(duckdb::MetaBlockWriter &writer) {
|
|
|
61923
62059
|
prefix.Serialize(writer);
|
|
61924
62060
|
// Write Row Ids
|
|
61925
62061
|
// Length
|
|
61926
|
-
writer.Write(count);
|
|
62062
|
+
writer.Write<uint16_t>(count);
|
|
61927
62063
|
// Actual Row Ids
|
|
62064
|
+
auto row_ids = GetRowIds();
|
|
61928
62065
|
for (idx_t i = 0; i < count; i++) {
|
|
61929
62066
|
writer.Write(row_ids[i]);
|
|
61930
62067
|
}
|
|
@@ -61935,11 +62072,19 @@ Leaf *Leaf::Deserialize(MetaBlockReader &reader) {
|
|
|
61935
62072
|
Prefix prefix;
|
|
61936
62073
|
prefix.Deserialize(reader);
|
|
61937
62074
|
auto num_elements = reader.Read<uint16_t>();
|
|
61938
|
-
|
|
61939
|
-
|
|
61940
|
-
|
|
62075
|
+
if (num_elements == 1) {
|
|
62076
|
+
// inlined
|
|
62077
|
+
auto element = reader.Read<row_t>();
|
|
62078
|
+
return Leaf::New(element, prefix);
|
|
62079
|
+
} else {
|
|
62080
|
+
// non-inlined
|
|
62081
|
+
auto elements = AllocateArray<row_t>(num_elements + 1);
|
|
62082
|
+
elements[0] = num_elements;
|
|
62083
|
+
for (idx_t i = 0; i < num_elements; i++) {
|
|
62084
|
+
elements[i + 1] = reader.Read<row_t>();
|
|
62085
|
+
}
|
|
62086
|
+
return Leaf::New(elements, num_elements, prefix);
|
|
61941
62087
|
}
|
|
61942
|
-
return new Leaf(move(elements), num_elements, prefix);
|
|
61943
62088
|
}
|
|
61944
62089
|
|
|
61945
62090
|
} // namespace duckdb
|
|
@@ -62074,25 +62219,78 @@ NodeType Node::GetTypeBySize(idx_t size) {
|
|
|
62074
62219
|
}
|
|
62075
62220
|
|
|
62076
62221
|
void Node::New(NodeType &type, Node *&node) {
|
|
62077
|
-
|
|
62078
62222
|
switch (type) {
|
|
62079
62223
|
case NodeType::N4:
|
|
62080
|
-
node =
|
|
62224
|
+
node = (Node *)Node4::New();
|
|
62081
62225
|
return;
|
|
62082
62226
|
case NodeType::N16:
|
|
62083
|
-
node =
|
|
62227
|
+
node = (Node *)Node16::New();
|
|
62084
62228
|
return;
|
|
62085
62229
|
case NodeType::N48:
|
|
62086
|
-
node =
|
|
62230
|
+
node = (Node *)Node48::New();
|
|
62087
62231
|
return;
|
|
62088
62232
|
case NodeType::N256:
|
|
62089
|
-
node =
|
|
62233
|
+
node = (Node *)Node256::New();
|
|
62090
62234
|
return;
|
|
62091
62235
|
default:
|
|
62092
62236
|
throw InternalException("Unrecognized type for new node creation!");
|
|
62093
62237
|
}
|
|
62094
62238
|
}
|
|
62095
62239
|
|
|
62240
|
+
Node4 *Node4::New() {
|
|
62241
|
+
return AllocateObject<Node4>();
|
|
62242
|
+
}
|
|
62243
|
+
|
|
62244
|
+
Node16 *Node16::New() {
|
|
62245
|
+
return AllocateObject<Node16>();
|
|
62246
|
+
}
|
|
62247
|
+
|
|
62248
|
+
Node48 *Node48::New() {
|
|
62249
|
+
return AllocateObject<Node48>();
|
|
62250
|
+
}
|
|
62251
|
+
|
|
62252
|
+
Node256 *Node256::New() {
|
|
62253
|
+
return AllocateObject<Node256>();
|
|
62254
|
+
}
|
|
62255
|
+
|
|
62256
|
+
Leaf *Leaf::New(Key &value, uint32_t depth, row_t row_id) {
|
|
62257
|
+
return AllocateObject<Leaf>(value, depth, row_id);
|
|
62258
|
+
}
|
|
62259
|
+
|
|
62260
|
+
Leaf *Leaf::New(Key &value, uint32_t depth, row_t *row_ids, idx_t num_elements) {
|
|
62261
|
+
return AllocateObject<Leaf>(value, depth, row_ids, num_elements);
|
|
62262
|
+
}
|
|
62263
|
+
|
|
62264
|
+
Leaf *Leaf::New(row_t *row_ids, idx_t num_elements, Prefix &prefix) {
|
|
62265
|
+
return AllocateObject<Leaf>(row_ids, num_elements, prefix);
|
|
62266
|
+
}
|
|
62267
|
+
|
|
62268
|
+
Leaf *Leaf::New(row_t row_id, Prefix &prefix) {
|
|
62269
|
+
return AllocateObject<Leaf>(row_id, prefix);
|
|
62270
|
+
}
|
|
62271
|
+
|
|
62272
|
+
void Node::Delete(Node *ptr) {
|
|
62273
|
+
switch (ptr->type) {
|
|
62274
|
+
case NodeType::NLeaf:
|
|
62275
|
+
DestroyObject((Leaf *)ptr);
|
|
62276
|
+
break;
|
|
62277
|
+
case NodeType::N4:
|
|
62278
|
+
DestroyObject((Node4 *)ptr);
|
|
62279
|
+
break;
|
|
62280
|
+
case NodeType::N16:
|
|
62281
|
+
DestroyObject((Node16 *)ptr);
|
|
62282
|
+
break;
|
|
62283
|
+
case NodeType::N48:
|
|
62284
|
+
DestroyObject((Node48 *)ptr);
|
|
62285
|
+
break;
|
|
62286
|
+
case NodeType::N256:
|
|
62287
|
+
DestroyObject((Node256 *)ptr);
|
|
62288
|
+
break;
|
|
62289
|
+
default:
|
|
62290
|
+
throw InternalException("eek");
|
|
62291
|
+
}
|
|
62292
|
+
}
|
|
62293
|
+
|
|
62096
62294
|
string Node::ToString(ART &art) {
|
|
62097
62295
|
|
|
62098
62296
|
string str = "Node";
|
|
@@ -62133,7 +62331,7 @@ BlockPointer Node::SerializeInternal(ART &art, duckdb::MetaBlockWriter &writer,
|
|
|
62133
62331
|
// Write Node Type
|
|
62134
62332
|
writer.Write(type);
|
|
62135
62333
|
// Write count
|
|
62136
|
-
writer.Write(count);
|
|
62334
|
+
writer.Write<uint16_t>(count);
|
|
62137
62335
|
// Write Prefix
|
|
62138
62336
|
prefix.Serialize(writer);
|
|
62139
62337
|
// Write Key values
|
|
@@ -62190,19 +62388,19 @@ Node *Node::Deserialize(ART &art, idx_t block_id, idx_t offset) {
|
|
|
62190
62388
|
case NodeType::NLeaf:
|
|
62191
62389
|
return Leaf::Deserialize(reader);
|
|
62192
62390
|
case NodeType::N4: {
|
|
62193
|
-
deserialized_node = (Node *)
|
|
62391
|
+
deserialized_node = (Node *)Node4::New();
|
|
62194
62392
|
break;
|
|
62195
62393
|
}
|
|
62196
62394
|
case NodeType::N16: {
|
|
62197
|
-
deserialized_node = (Node *)
|
|
62395
|
+
deserialized_node = (Node *)Node16::New();
|
|
62198
62396
|
break;
|
|
62199
62397
|
}
|
|
62200
62398
|
case NodeType::N48: {
|
|
62201
|
-
deserialized_node = (Node *)
|
|
62399
|
+
deserialized_node = (Node *)Node48::New();
|
|
62202
62400
|
break;
|
|
62203
62401
|
}
|
|
62204
62402
|
case NodeType::N256: {
|
|
62205
|
-
deserialized_node = (Node *)
|
|
62403
|
+
deserialized_node = (Node *)Node256::New();
|
|
62206
62404
|
break;
|
|
62207
62405
|
}
|
|
62208
62406
|
}
|
|
@@ -62211,7 +62409,6 @@ Node *Node::Deserialize(ART &art, idx_t block_id, idx_t offset) {
|
|
|
62211
62409
|
}
|
|
62212
62410
|
|
|
62213
62411
|
void UpdateParentsOfNodes(Node *&l_node, Node *&r_node, ParentsOfNodes &parents) {
|
|
62214
|
-
|
|
62215
62412
|
if (parents.l_parent) {
|
|
62216
62413
|
parents.l_parent->ReplaceChildPointer(parents.l_pos, l_node);
|
|
62217
62414
|
}
|
|
@@ -62255,7 +62452,6 @@ bool Merge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
|
|
|
62255
62452
|
}
|
|
62256
62453
|
|
|
62257
62454
|
bool ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &parents) {
|
|
62258
|
-
|
|
62259
62455
|
auto &l_node = info.l_node;
|
|
62260
62456
|
auto &r_node = info.r_node;
|
|
62261
62457
|
Node *null_parent = nullptr;
|
|
@@ -62308,7 +62504,7 @@ bool ResolvePrefixesAndMerge(MergeInfo &info, idx_t depth, ParentsOfNodes &paren
|
|
|
62308
62504
|
// prefixes differ, create new node and insert both nodes as children
|
|
62309
62505
|
|
|
62310
62506
|
// create new node
|
|
62311
|
-
Node *new_node =
|
|
62507
|
+
Node *new_node = Node4::New();
|
|
62312
62508
|
new_node->prefix = Prefix(l_node->prefix, mismatch_pos);
|
|
62313
62509
|
|
|
62314
62510
|
// insert l_node, break up prefix of l_node
|
|
@@ -62435,7 +62631,7 @@ void Node16::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
|
|
|
62435
62631
|
n->count++;
|
|
62436
62632
|
} else {
|
|
62437
62633
|
// Grow to Node48
|
|
62438
|
-
auto new_node =
|
|
62634
|
+
auto new_node = Node48::New();
|
|
62439
62635
|
for (idx_t i = 0; i < node->count; i++) {
|
|
62440
62636
|
new_node->child_index[n->key[i]] = i;
|
|
62441
62637
|
new_node->children[i] = n->children[i];
|
|
@@ -62443,7 +62639,7 @@ void Node16::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
|
|
|
62443
62639
|
}
|
|
62444
62640
|
new_node->prefix = move(n->prefix);
|
|
62445
62641
|
new_node->count = node->count;
|
|
62446
|
-
|
|
62642
|
+
Node::Delete(node);
|
|
62447
62643
|
node = new_node;
|
|
62448
62644
|
|
|
62449
62645
|
Node48::InsertChild(node, key_byte, new_child);
|
|
@@ -62470,14 +62666,14 @@ void Node16::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
62470
62666
|
|
|
62471
62667
|
if (node->count <= 3) {
|
|
62472
62668
|
// Shrink node
|
|
62473
|
-
auto new_node =
|
|
62669
|
+
auto new_node = Node4::New();
|
|
62474
62670
|
for (unsigned i = 0; i < n->count; i++) {
|
|
62475
62671
|
new_node->key[new_node->count] = n->key[i];
|
|
62476
62672
|
new_node->children[new_node->count++] = n->children[i];
|
|
62477
62673
|
n->children[i] = nullptr;
|
|
62478
62674
|
}
|
|
62479
62675
|
new_node->prefix = move(n->prefix);
|
|
62480
|
-
|
|
62676
|
+
Node::Delete(node);
|
|
62481
62677
|
node = new_node;
|
|
62482
62678
|
}
|
|
62483
62679
|
}
|
|
@@ -62570,7 +62766,7 @@ void Node256::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
62570
62766
|
n->children[pos].Reset();
|
|
62571
62767
|
n->count--;
|
|
62572
62768
|
if (node->count <= 36) {
|
|
62573
|
-
auto new_node =
|
|
62769
|
+
auto new_node = Node48::New();
|
|
62574
62770
|
new_node->prefix = move(n->prefix);
|
|
62575
62771
|
for (idx_t i = 0; i < 256; i++) {
|
|
62576
62772
|
if (n->children[i]) {
|
|
@@ -62580,7 +62776,7 @@ void Node256::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
62580
62776
|
new_node->count++;
|
|
62581
62777
|
}
|
|
62582
62778
|
}
|
|
62583
|
-
|
|
62779
|
+
Node::Delete(node);
|
|
62584
62780
|
node = new_node;
|
|
62585
62781
|
}
|
|
62586
62782
|
}
|
|
@@ -62682,7 +62878,7 @@ void Node4::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
|
|
|
62682
62878
|
n->count++;
|
|
62683
62879
|
} else {
|
|
62684
62880
|
// Grow to Node16
|
|
62685
|
-
auto new_node =
|
|
62881
|
+
auto new_node = Node16::New();
|
|
62686
62882
|
new_node->count = 4;
|
|
62687
62883
|
new_node->prefix = move(node->prefix);
|
|
62688
62884
|
for (idx_t i = 0; i < 4; i++) {
|
|
@@ -62691,7 +62887,7 @@ void Node4::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
|
|
|
62691
62887
|
n->children[i] = nullptr;
|
|
62692
62888
|
}
|
|
62693
62889
|
// Delete old node and replace it with new Node16
|
|
62694
|
-
|
|
62890
|
+
Node::Delete(node);
|
|
62695
62891
|
node = new_node;
|
|
62696
62892
|
Node16::InsertChild(node, key_byte, new_child);
|
|
62697
62893
|
}
|
|
@@ -62719,7 +62915,7 @@ void Node4::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
62719
62915
|
// concatenate prefixes
|
|
62720
62916
|
child_ref->prefix.Concatenate(n->key[0], node->prefix);
|
|
62721
62917
|
n->children[0] = nullptr;
|
|
62722
|
-
|
|
62918
|
+
Node::Delete(node);
|
|
62723
62919
|
node = child_ref;
|
|
62724
62920
|
}
|
|
62725
62921
|
}
|
|
@@ -62824,7 +63020,7 @@ void Node48::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
|
|
|
62824
63020
|
n->count++;
|
|
62825
63021
|
} else {
|
|
62826
63022
|
// Grow to Node256
|
|
62827
|
-
auto new_node =
|
|
63023
|
+
auto new_node = Node256::New();
|
|
62828
63024
|
for (idx_t i = 0; i < 256; i++) {
|
|
62829
63025
|
if (n->child_index[i] != Node::EMPTY_MARKER) {
|
|
62830
63026
|
new_node->children[i] = n->children[n->child_index[i]];
|
|
@@ -62833,7 +63029,7 @@ void Node48::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
|
|
|
62833
63029
|
}
|
|
62834
63030
|
new_node->count = n->count;
|
|
62835
63031
|
new_node->prefix = move(n->prefix);
|
|
62836
|
-
|
|
63032
|
+
Node::Delete(node);
|
|
62837
63033
|
node = new_node;
|
|
62838
63034
|
Node256::InsertChild(node, key_byte, new_child);
|
|
62839
63035
|
}
|
|
@@ -62845,7 +63041,7 @@ void Node48::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
62845
63041
|
n->child_index[pos] = Node::EMPTY_MARKER;
|
|
62846
63042
|
n->count--;
|
|
62847
63043
|
if (node->count <= 12) {
|
|
62848
|
-
auto new_node =
|
|
63044
|
+
auto new_node = Node16::New();
|
|
62849
63045
|
new_node->prefix = move(n->prefix);
|
|
62850
63046
|
for (idx_t i = 0; i < 256; i++) {
|
|
62851
63047
|
if (n->child_index[i] != Node::EMPTY_MARKER) {
|
|
@@ -62854,7 +63050,7 @@ void Node48::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
62854
63050
|
n->children[n->child_index[i]] = nullptr;
|
|
62855
63051
|
}
|
|
62856
63052
|
}
|
|
62857
|
-
|
|
63053
|
+
Node::Delete(node);
|
|
62858
63054
|
node = new_node;
|
|
62859
63055
|
}
|
|
62860
63056
|
}
|
|
@@ -62889,12 +63085,38 @@ uint32_t Prefix::Size() const {
|
|
|
62889
63085
|
return size;
|
|
62890
63086
|
}
|
|
62891
63087
|
|
|
63088
|
+
bool Prefix::IsInlined() const {
|
|
63089
|
+
return size <= PREFIX_INLINE_BYTES;
|
|
63090
|
+
}
|
|
63091
|
+
|
|
63092
|
+
uint8_t *Prefix::GetPrefixData() {
|
|
63093
|
+
return IsInlined() ? &value.inlined[0] : value.ptr;
|
|
63094
|
+
}
|
|
63095
|
+
|
|
63096
|
+
const uint8_t *Prefix::GetPrefixData() const {
|
|
63097
|
+
return IsInlined() ? &value.inlined[0] : value.ptr;
|
|
63098
|
+
}
|
|
63099
|
+
|
|
63100
|
+
uint8_t *Prefix::AllocatePrefix(uint32_t size) {
|
|
63101
|
+
Destroy();
|
|
63102
|
+
|
|
63103
|
+
this->size = size;
|
|
63104
|
+
uint8_t *prefix;
|
|
63105
|
+
if (IsInlined()) {
|
|
63106
|
+
prefix = &value.inlined[0];
|
|
63107
|
+
} else {
|
|
63108
|
+
// allocate new prefix
|
|
63109
|
+
value.ptr = AllocateArray<uint8_t>(size);
|
|
63110
|
+
prefix = value.ptr;
|
|
63111
|
+
}
|
|
63112
|
+
return prefix;
|
|
63113
|
+
}
|
|
63114
|
+
|
|
62892
63115
|
Prefix::Prefix() : size(0) {
|
|
62893
63116
|
}
|
|
62894
63117
|
|
|
62895
|
-
Prefix::Prefix(Key &key, uint32_t depth, uint32_t size) : size(
|
|
62896
|
-
|
|
62897
|
-
prefix = unique_ptr<uint8_t[]>(new uint8_t[size]);
|
|
63118
|
+
Prefix::Prefix(Key &key, uint32_t depth, uint32_t size) : size(0) {
|
|
63119
|
+
auto prefix = AllocatePrefix(size);
|
|
62898
63120
|
|
|
62899
63121
|
// copy key to prefix
|
|
62900
63122
|
idx_t prefix_idx = 0;
|
|
@@ -62903,43 +63125,72 @@ Prefix::Prefix(Key &key, uint32_t depth, uint32_t size) : size(size) {
|
|
|
62903
63125
|
}
|
|
62904
63126
|
}
|
|
62905
63127
|
|
|
62906
|
-
Prefix::Prefix(Prefix &other_prefix, uint32_t size) : size(
|
|
62907
|
-
|
|
62908
|
-
prefix = unique_ptr<uint8_t[]>(new uint8_t[size]);
|
|
63128
|
+
Prefix::Prefix(Prefix &other_prefix, uint32_t size) : size(0) {
|
|
63129
|
+
auto prefix = AllocatePrefix(size);
|
|
62909
63130
|
|
|
62910
63131
|
// copy key to Prefix
|
|
63132
|
+
auto other_data = other_prefix.GetPrefixData();
|
|
62911
63133
|
for (idx_t i = 0; i < size; i++) {
|
|
62912
|
-
prefix[i] =
|
|
63134
|
+
prefix[i] = other_data[i];
|
|
63135
|
+
}
|
|
63136
|
+
}
|
|
63137
|
+
|
|
63138
|
+
Prefix::~Prefix() {
|
|
63139
|
+
Destroy();
|
|
63140
|
+
}
|
|
63141
|
+
|
|
63142
|
+
void Prefix::Destroy() {
|
|
63143
|
+
if (!IsInlined()) {
|
|
63144
|
+
DeleteArray<uint8_t>(value.ptr, size);
|
|
63145
|
+
size = 0;
|
|
62913
63146
|
}
|
|
62914
63147
|
}
|
|
62915
63148
|
|
|
62916
63149
|
uint8_t &Prefix::operator[](idx_t idx) {
|
|
62917
63150
|
D_ASSERT(idx < Size());
|
|
62918
|
-
return
|
|
63151
|
+
return GetPrefixData()[idx];
|
|
62919
63152
|
}
|
|
62920
63153
|
|
|
62921
63154
|
Prefix &Prefix::operator=(const Prefix &src) {
|
|
62922
|
-
|
|
62923
|
-
prefix = unique_ptr<uint8_t[]>(new uint8_t[src.size]);
|
|
63155
|
+
auto prefix = AllocatePrefix(src.size);
|
|
62924
63156
|
|
|
62925
63157
|
// copy prefix
|
|
63158
|
+
auto src_prefix = src.GetPrefixData();
|
|
62926
63159
|
for (idx_t i = 0; i < src.size; i++) {
|
|
62927
|
-
prefix[i] =
|
|
63160
|
+
prefix[i] = src_prefix[i];
|
|
62928
63161
|
}
|
|
62929
63162
|
size = src.size;
|
|
62930
63163
|
return *this;
|
|
62931
63164
|
}
|
|
62932
63165
|
|
|
62933
63166
|
Prefix &Prefix::operator=(Prefix &&other) noexcept {
|
|
62934
|
-
|
|
62935
|
-
|
|
63167
|
+
std::swap(size, other.size);
|
|
63168
|
+
std::swap(value, other.value);
|
|
62936
63169
|
return *this;
|
|
62937
63170
|
}
|
|
62938
63171
|
|
|
63172
|
+
void Prefix::Overwrite(uint32_t new_size, uint8_t *data) {
|
|
63173
|
+
if (new_size <= PREFIX_INLINE_BYTES) {
|
|
63174
|
+
// new entry would be inlined
|
|
63175
|
+
// inline the data and destroy the pointer
|
|
63176
|
+
auto prefix = AllocatePrefix(new_size);
|
|
63177
|
+
for (idx_t i = 0; i < new_size; i++) {
|
|
63178
|
+
prefix[i] = data[i];
|
|
63179
|
+
}
|
|
63180
|
+
DeleteArray<uint8_t>(data, new_size);
|
|
63181
|
+
} else {
|
|
63182
|
+
// new entry would not be inlined
|
|
63183
|
+
// take over the data directly
|
|
63184
|
+
Destroy();
|
|
63185
|
+
size = new_size;
|
|
63186
|
+
value.ptr = data;
|
|
63187
|
+
}
|
|
63188
|
+
}
|
|
63189
|
+
|
|
62939
63190
|
void Prefix::Concatenate(uint8_t key, Prefix &other) {
|
|
62940
63191
|
auto new_length = size + 1 + other.size;
|
|
62941
63192
|
// have to allocate space in our prefix array
|
|
62942
|
-
|
|
63193
|
+
auto new_prefix = AllocateArray<uint8_t>(new_length);
|
|
62943
63194
|
idx_t new_prefix_idx = 0;
|
|
62944
63195
|
// 1) add the to-be deleted node's prefix
|
|
62945
63196
|
for (uint32_t i = 0; i < other.size; i++) {
|
|
@@ -62948,42 +63199,46 @@ void Prefix::Concatenate(uint8_t key, Prefix &other) {
|
|
|
62948
63199
|
// 2) now move the current key as part of the prefix
|
|
62949
63200
|
new_prefix[new_prefix_idx++] = key;
|
|
62950
63201
|
// 3) move the existing prefix (if any)
|
|
63202
|
+
auto prefix = GetPrefixData();
|
|
62951
63203
|
for (uint32_t i = 0; i < size; i++) {
|
|
62952
63204
|
new_prefix[new_prefix_idx++] = prefix[i];
|
|
62953
63205
|
}
|
|
62954
|
-
|
|
62955
|
-
size = new_length;
|
|
63206
|
+
Overwrite(new_length, new_prefix);
|
|
62956
63207
|
}
|
|
62957
63208
|
|
|
62958
63209
|
uint8_t Prefix::Reduce(uint32_t n) {
|
|
62959
63210
|
auto new_size = size - n - 1;
|
|
62960
|
-
auto
|
|
63211
|
+
auto prefix = GetPrefixData();
|
|
62961
63212
|
auto key = prefix[n];
|
|
63213
|
+
if (new_size == 0) {
|
|
63214
|
+
Destroy();
|
|
63215
|
+
size = 0;
|
|
63216
|
+
return key;
|
|
63217
|
+
}
|
|
63218
|
+
auto new_prefix = AllocateArray<uint8_t>(new_size);
|
|
62962
63219
|
for (idx_t i = 0; i < new_size; i++) {
|
|
62963
63220
|
new_prefix[i] = prefix[i + n + 1];
|
|
62964
63221
|
}
|
|
62965
|
-
|
|
62966
|
-
size = new_size;
|
|
63222
|
+
Overwrite(new_size, new_prefix);
|
|
62967
63223
|
return key;
|
|
62968
63224
|
}
|
|
62969
63225
|
|
|
62970
63226
|
void Prefix::Serialize(duckdb::MetaBlockWriter &writer) {
|
|
62971
63227
|
writer.Write(size);
|
|
62972
|
-
|
|
62973
|
-
|
|
62974
|
-
}
|
|
63228
|
+
auto prefix = GetPrefixData();
|
|
63229
|
+
writer.WriteData(prefix, size);
|
|
62975
63230
|
}
|
|
62976
63231
|
|
|
62977
63232
|
void Prefix::Deserialize(duckdb::MetaBlockReader &reader) {
|
|
62978
|
-
|
|
62979
|
-
prefix =
|
|
62980
|
-
|
|
62981
|
-
|
|
62982
|
-
}
|
|
63233
|
+
auto prefix_size = reader.Read<uint32_t>();
|
|
63234
|
+
auto prefix = AllocatePrefix(prefix_size);
|
|
63235
|
+
this->size = prefix_size;
|
|
63236
|
+
reader.ReadData(prefix, size);
|
|
62983
63237
|
}
|
|
62984
63238
|
|
|
62985
63239
|
uint32_t Prefix::KeyMismatchPosition(Key &key, uint64_t depth) {
|
|
62986
63240
|
uint64_t pos;
|
|
63241
|
+
auto prefix = GetPrefixData();
|
|
62987
63242
|
for (pos = 0; pos < size; pos++) {
|
|
62988
63243
|
if (key[depth + pos] != prefix[pos]) {
|
|
62989
63244
|
return pos;
|
|
@@ -62993,9 +63248,10 @@ uint32_t Prefix::KeyMismatchPosition(Key &key, uint64_t depth) {
|
|
|
62993
63248
|
}
|
|
62994
63249
|
|
|
62995
63250
|
uint32_t Prefix::MismatchPosition(Prefix &other) {
|
|
62996
|
-
|
|
63251
|
+
auto prefix = GetPrefixData();
|
|
63252
|
+
auto other_data = other.GetPrefixData();
|
|
62997
63253
|
for (idx_t i = 0; i < size; i++) {
|
|
62998
|
-
if (prefix[i] !=
|
|
63254
|
+
if (prefix[i] != other_data[i]) {
|
|
62999
63255
|
return i;
|
|
63000
63256
|
}
|
|
63001
63257
|
}
|
|
@@ -63009,7 +63265,7 @@ namespace duckdb {
|
|
|
63009
63265
|
SwizzleablePointer::~SwizzleablePointer() {
|
|
63010
63266
|
if (pointer) {
|
|
63011
63267
|
if (!IsSwizzled()) {
|
|
63012
|
-
|
|
63268
|
+
Node::Delete((Node *)pointer);
|
|
63013
63269
|
}
|
|
63014
63270
|
}
|
|
63015
63271
|
}
|
|
@@ -63071,7 +63327,7 @@ bool SwizzleablePointer::IsSwizzled() {
|
|
|
63071
63327
|
void SwizzleablePointer::Reset() {
|
|
63072
63328
|
if (pointer) {
|
|
63073
63329
|
if (!IsSwizzled()) {
|
|
63074
|
-
|
|
63330
|
+
Node::Delete((Node *)pointer);
|
|
63075
63331
|
}
|
|
63076
63332
|
}
|
|
63077
63333
|
*this = nullptr;
|
|
@@ -81125,17 +81381,21 @@ bool BufferedCSVReader::TryParseCSV(ParserMode parser_mode, DataChunk &insert_ch
|
|
|
81125
81381
|
|
|
81126
81382
|
namespace duckdb {
|
|
81127
81383
|
|
|
81128
|
-
CSVBuffer::CSVBuffer(idx_t buffer_size_p, CSVFileHandle &file_handle)
|
|
81129
|
-
|
|
81130
|
-
|
|
81384
|
+
CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle)
|
|
81385
|
+
: context(context), first_buffer(true) {
|
|
81386
|
+
this->handle = AllocateBuffer(buffer_size_p);
|
|
81387
|
+
|
|
81388
|
+
auto buffer = Ptr();
|
|
81389
|
+
actual_size = file_handle.Read(buffer, buffer_size_p);
|
|
81131
81390
|
if (actual_size >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF') {
|
|
81132
81391
|
start_position += 3;
|
|
81133
81392
|
}
|
|
81134
81393
|
last_buffer = file_handle.FinishedReading();
|
|
81135
81394
|
}
|
|
81136
81395
|
|
|
81137
|
-
CSVBuffer::CSVBuffer(
|
|
81138
|
-
|
|
81396
|
+
CSVBuffer::CSVBuffer(ClientContext &context, BufferHandle buffer_p, idx_t buffer_size_p, idx_t actual_size_p,
|
|
81397
|
+
bool final_buffer)
|
|
81398
|
+
: context(context), handle(move(buffer_p)), actual_size(actual_size_p), last_buffer(final_buffer) {
|
|
81139
81399
|
}
|
|
81140
81400
|
|
|
81141
81401
|
unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t set_buffer_size) {
|
|
@@ -81144,14 +81404,18 @@ unique_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t set_buff
|
|
|
81144
81404
|
return nullptr;
|
|
81145
81405
|
}
|
|
81146
81406
|
|
|
81147
|
-
auto next_buffer =
|
|
81148
|
-
|
|
81149
|
-
idx_t next_buffer_actual_size = file_handle.Read(next_buffer.get(), set_buffer_size);
|
|
81407
|
+
auto next_buffer = AllocateBuffer(set_buffer_size);
|
|
81408
|
+
idx_t next_buffer_actual_size = file_handle.Read(next_buffer.Ptr(), set_buffer_size);
|
|
81150
81409
|
|
|
81151
|
-
return make_unique<CSVBuffer>(move(next_buffer), set_buffer_size, next_buffer_actual_size,
|
|
81410
|
+
return make_unique<CSVBuffer>(context, move(next_buffer), set_buffer_size, next_buffer_actual_size,
|
|
81152
81411
|
file_handle.FinishedReading());
|
|
81153
81412
|
}
|
|
81154
81413
|
|
|
81414
|
+
BufferHandle CSVBuffer::AllocateBuffer(idx_t buffer_size) {
|
|
81415
|
+
auto &buffer_manager = BufferManager::GetBufferManager(context);
|
|
81416
|
+
return buffer_manager.Allocate(MaxValue<idx_t>(Storage::BLOCK_SIZE, buffer_size));
|
|
81417
|
+
}
|
|
81418
|
+
|
|
81155
81419
|
idx_t CSVBuffer::GetBufferSize() {
|
|
81156
81420
|
return actual_size;
|
|
81157
81421
|
}
|
|
@@ -81202,6 +81466,9 @@ static bool ParseBoolean(const Value &value, const string &loption) {
|
|
|
81202
81466
|
}
|
|
81203
81467
|
|
|
81204
81468
|
static string ParseString(const Value &value, const string &loption) {
|
|
81469
|
+
if (value.IsNull()) {
|
|
81470
|
+
return string();
|
|
81471
|
+
}
|
|
81205
81472
|
if (value.type().id() == LogicalTypeId::LIST) {
|
|
81206
81473
|
auto &children = ListValue::GetChildren(value);
|
|
81207
81474
|
if (children.size() != 1) {
|
|
@@ -81356,6 +81623,11 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
|
|
|
81356
81623
|
ignore_errors = ParseBoolean(value, loption);
|
|
81357
81624
|
} else if (loption == "union_by_name") {
|
|
81358
81625
|
union_by_name = ParseBoolean(value, loption);
|
|
81626
|
+
} else if (loption == "buffer_size") {
|
|
81627
|
+
buffer_size = ParseInteger(value, loption);
|
|
81628
|
+
if (buffer_size == 0) {
|
|
81629
|
+
throw InvalidInputException("Buffer Size option must be higher than 0");
|
|
81630
|
+
}
|
|
81359
81631
|
} else {
|
|
81360
81632
|
throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
|
|
81361
81633
|
}
|
|
@@ -81469,34 +81741,38 @@ struct CSVBufferRead {
|
|
|
81469
81741
|
|
|
81470
81742
|
const char &operator[](size_t i) const {
|
|
81471
81743
|
if (i < buffer->GetBufferSize()) {
|
|
81472
|
-
|
|
81744
|
+
auto buffer_ptr = buffer->Ptr();
|
|
81745
|
+
return buffer_ptr[i];
|
|
81473
81746
|
}
|
|
81474
|
-
|
|
81747
|
+
auto next_ptr = next_buffer->Ptr();
|
|
81748
|
+
return next_ptr[i - buffer->GetBufferSize()];
|
|
81475
81749
|
}
|
|
81476
81750
|
|
|
81477
81751
|
string_t GetValue(idx_t start_buffer, idx_t position_buffer, idx_t offset) {
|
|
81478
81752
|
idx_t length = position_buffer - start_buffer - offset;
|
|
81479
81753
|
// 1) It's all in the current buffer
|
|
81480
81754
|
if (start_buffer + length <= buffer->GetBufferSize()) {
|
|
81481
|
-
auto buffer_ptr = buffer->
|
|
81755
|
+
auto buffer_ptr = buffer->Ptr();
|
|
81482
81756
|
return string_t(buffer_ptr + start_buffer, length);
|
|
81483
81757
|
} else if (start_buffer >= buffer->GetBufferSize()) {
|
|
81484
81758
|
// 2) It's all in the next buffer
|
|
81485
81759
|
D_ASSERT(next_buffer);
|
|
81486
81760
|
D_ASSERT(next_buffer->GetBufferSize() >= length + (start_buffer - buffer->GetBufferSize()));
|
|
81487
|
-
auto buffer_ptr = next_buffer->
|
|
81761
|
+
auto buffer_ptr = next_buffer->Ptr();
|
|
81488
81762
|
return string_t(buffer_ptr + (start_buffer - buffer->GetBufferSize()), length);
|
|
81489
81763
|
} else {
|
|
81490
81764
|
// 3) It starts in the current buffer and ends in the next buffer
|
|
81491
81765
|
D_ASSERT(next_buffer);
|
|
81492
81766
|
auto intersection = unique_ptr<char[]>(new char[length]);
|
|
81493
81767
|
idx_t cur_pos = 0;
|
|
81768
|
+
auto buffer_ptr = buffer->Ptr();
|
|
81494
81769
|
for (idx_t i = start_buffer; i < buffer->GetBufferSize(); i++) {
|
|
81495
|
-
intersection[cur_pos++] =
|
|
81770
|
+
intersection[cur_pos++] = buffer_ptr[i];
|
|
81496
81771
|
}
|
|
81497
81772
|
idx_t nxt_buffer_pos = 0;
|
|
81773
|
+
auto next_buffer_ptr = next_buffer->Ptr();
|
|
81498
81774
|
for (; cur_pos < length; cur_pos++) {
|
|
81499
|
-
intersection[cur_pos] =
|
|
81775
|
+
intersection[cur_pos] = next_buffer_ptr[nxt_buffer_pos++];
|
|
81500
81776
|
}
|
|
81501
81777
|
intersections.emplace_back(move(intersection));
|
|
81502
81778
|
return string_t(intersections.back().get(), length);
|
|
@@ -81809,10 +82085,11 @@ normal : {
|
|
|
81809
82085
|
/* state: normal parsing state */
|
|
81810
82086
|
// this state parses the remainder of a non-quoted value until we reach a delimiter or newline
|
|
81811
82087
|
for (; position_buffer < end_buffer; position_buffer++) {
|
|
81812
|
-
|
|
82088
|
+
auto c = (*buffer)[position_buffer];
|
|
82089
|
+
if (c == options.delimiter[0]) {
|
|
81813
82090
|
// delimiter: end the value and add it to the chunk
|
|
81814
82091
|
goto add_value;
|
|
81815
|
-
} else if (StringUtil::CharacterIsNewline(
|
|
82092
|
+
} else if (StringUtil::CharacterIsNewline(c)) {
|
|
81816
82093
|
// newline: add row
|
|
81817
82094
|
D_ASSERT(try_add_line || column == insert_chunk.ColumnCount() - 1);
|
|
81818
82095
|
goto add_row;
|
|
@@ -81882,10 +82159,11 @@ in_quotes:
|
|
|
81882
82159
|
has_quotes = true;
|
|
81883
82160
|
position_buffer++;
|
|
81884
82161
|
for (; position_buffer < end_buffer; position_buffer++) {
|
|
81885
|
-
|
|
82162
|
+
auto c = (*buffer)[position_buffer];
|
|
82163
|
+
if (c == options.quote[0]) {
|
|
81886
82164
|
// quote: move to unquoted state
|
|
81887
82165
|
goto unquote;
|
|
81888
|
-
} else if (
|
|
82166
|
+
} else if (c == options.escape[0]) {
|
|
81889
82167
|
// escape: store the escaped position and move to handle_escape state
|
|
81890
82168
|
escape_positions.push_back(position_buffer - start_buffer);
|
|
81891
82169
|
goto handle_escape;
|
|
@@ -81907,7 +82185,7 @@ in_quotes:
|
|
|
81907
82185
|
goto in_quotes;
|
|
81908
82186
|
}
|
|
81909
82187
|
|
|
81910
|
-
unquote:
|
|
82188
|
+
unquote : {
|
|
81911
82189
|
/* state: unquote: this state handles the state directly after we unquote*/
|
|
81912
82190
|
//
|
|
81913
82191
|
// in this state we expect either another quote (entering the quoted state again, and escaping the quote)
|
|
@@ -81917,16 +82195,16 @@ unquote:
|
|
|
81917
82195
|
offset = 1;
|
|
81918
82196
|
goto final_state;
|
|
81919
82197
|
}
|
|
81920
|
-
|
|
81921
|
-
|
|
82198
|
+
auto c = (*buffer)[position_buffer];
|
|
82199
|
+
if (c == options.quote[0] && (options.escape.empty() || options.escape[0] == options.quote[0])) {
|
|
81922
82200
|
// escaped quote, return to quoted state and store escape position
|
|
81923
82201
|
escape_positions.push_back(position_buffer - start_buffer);
|
|
81924
82202
|
goto in_quotes;
|
|
81925
|
-
} else if (
|
|
82203
|
+
} else if (c == options.delimiter[0]) {
|
|
81926
82204
|
// delimiter, add value
|
|
81927
82205
|
offset = 1;
|
|
81928
82206
|
goto add_value;
|
|
81929
|
-
} else if (StringUtil::CharacterIsNewline(
|
|
82207
|
+
} else if (StringUtil::CharacterIsNewline(c)) {
|
|
81930
82208
|
offset = 1;
|
|
81931
82209
|
D_ASSERT(column == insert_chunk.ColumnCount() - 1);
|
|
81932
82210
|
goto add_row;
|
|
@@ -81941,6 +82219,7 @@ unquote:
|
|
|
81941
82219
|
options.file_path, GetLineNumberStr(linenr, linenr_estimated).c_str(), options.ToString());
|
|
81942
82220
|
return false;
|
|
81943
82221
|
}
|
|
82222
|
+
}
|
|
81944
82223
|
handle_escape : {
|
|
81945
82224
|
/* state: handle_escape */
|
|
81946
82225
|
// escape should be followed by a quote or another escape character
|
|
@@ -124848,7 +125127,7 @@ void SubstringDetection(string &str_1, string &str_2, const string &name_str_1,
|
|
|
124848
125127
|
if (str_1.empty() || str_2.empty()) {
|
|
124849
125128
|
return;
|
|
124850
125129
|
}
|
|
124851
|
-
if ((str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos)
|
|
125130
|
+
if ((str_1.find(str_2) != string::npos || str_2.find(str_1) != std::string::npos)) {
|
|
124852
125131
|
throw BinderException("%s must not appear in the %s specification and vice versa", name_str_1, name_str_2);
|
|
124853
125132
|
}
|
|
124854
125133
|
}
|
|
@@ -124941,6 +125220,11 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, CopyInfo &in
|
|
|
124941
125220
|
options.force_not_null.resize(expected_types.size(), false);
|
|
124942
125221
|
}
|
|
124943
125222
|
bind_data->FinalizeRead(context);
|
|
125223
|
+
if (!bind_data->single_threaded && options.auto_detect) {
|
|
125224
|
+
options.file_path = bind_data->files[0];
|
|
125225
|
+
auto initial_reader = make_unique<BufferedCSVReader>(context, options);
|
|
125226
|
+
options = initial_reader->options;
|
|
125227
|
+
}
|
|
124944
125228
|
return move(bind_data);
|
|
124945
125229
|
}
|
|
124946
125230
|
|
|
@@ -126083,11 +126367,6 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
|
126083
126367
|
options.include_file_name = BooleanValue::Get(kv.second);
|
|
126084
126368
|
} else if (loption == "hive_partitioning") {
|
|
126085
126369
|
options.include_parsed_hive_partitions = BooleanValue::Get(kv.second);
|
|
126086
|
-
} else if (loption == "buffer_size") {
|
|
126087
|
-
options.buffer_size = kv.second.GetValue<uint64_t>();
|
|
126088
|
-
if (options.buffer_size == 0) {
|
|
126089
|
-
throw InvalidInputException("Buffer Size option must be higher than 0");
|
|
126090
|
-
}
|
|
126091
126370
|
} else {
|
|
126092
126371
|
options.SetReadOption(loption, kv.second, names);
|
|
126093
126372
|
}
|
|
@@ -126106,7 +126385,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
|
126106
126385
|
} else {
|
|
126107
126386
|
D_ASSERT(return_types.size() == names.size());
|
|
126108
126387
|
}
|
|
126109
|
-
options =
|
|
126388
|
+
options = initial_reader->options;
|
|
126110
126389
|
result->sql_types = initial_reader->sql_types;
|
|
126111
126390
|
result->initial_reader = move(initial_reader);
|
|
126112
126391
|
} else {
|
|
@@ -126204,8 +126483,9 @@ static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFun
|
|
|
126204
126483
|
//===--------------------------------------------------------------------===//
|
|
126205
126484
|
struct ParallelCSVGlobalState : public GlobalTableFunctionState {
|
|
126206
126485
|
public:
|
|
126207
|
-
ParallelCSVGlobalState(unique_ptr<CSVFileHandle> file_handle_p,
|
|
126208
|
-
idx_t system_threads_p, idx_t buffer_size_p,
|
|
126486
|
+
ParallelCSVGlobalState(ClientContext &context, unique_ptr<CSVFileHandle> file_handle_p,
|
|
126487
|
+
vector<string> &files_path_p, idx_t system_threads_p, idx_t buffer_size_p,
|
|
126488
|
+
idx_t rows_to_skip)
|
|
126209
126489
|
: file_handle(move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p) {
|
|
126210
126490
|
for (idx_t i = 0; i < rows_to_skip; i++) {
|
|
126211
126491
|
file_handle->ReadLine();
|
|
@@ -126219,7 +126499,7 @@ public:
|
|
|
126219
126499
|
} else {
|
|
126220
126500
|
bytes_per_local_state = file_size / MaxThreads();
|
|
126221
126501
|
}
|
|
126222
|
-
current_buffer = make_shared<CSVBuffer>(buffer_size, *file_handle);
|
|
126502
|
+
current_buffer = make_shared<CSVBuffer>(context, buffer_size, *file_handle);
|
|
126223
126503
|
next_buffer = current_buffer->Next(*file_handle, buffer_size);
|
|
126224
126504
|
}
|
|
126225
126505
|
ParallelCSVGlobalState() {
|
|
@@ -126306,7 +126586,7 @@ unique_ptr<CSVBufferRead> ParallelCSVGlobalState::Next(ClientContext &context, R
|
|
|
126306
126586
|
if (file_index < bind_data.files.size()) {
|
|
126307
126587
|
bind_data.options.file_path = bind_data.files[file_index++];
|
|
126308
126588
|
file_handle = ReadCSV::OpenCSV(bind_data.options, context);
|
|
126309
|
-
next_buffer = make_shared<CSVBuffer>(buffer_size, *file_handle);
|
|
126589
|
+
next_buffer = make_shared<CSVBuffer>(context, buffer_size, *file_handle);
|
|
126310
126590
|
}
|
|
126311
126591
|
}
|
|
126312
126592
|
return result;
|
|
@@ -126327,8 +126607,9 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
|
|
|
126327
126607
|
file_handle = ReadCSV::OpenCSV(bind_data.options, context);
|
|
126328
126608
|
}
|
|
126329
126609
|
idx_t rows_to_skip = bind_data.options.skip_rows + (bind_data.options.has_header ? 1 : 0);
|
|
126330
|
-
return make_unique<ParallelCSVGlobalState>(move(file_handle), bind_data.files,
|
|
126331
|
-
bind_data.options.buffer_size,
|
|
126610
|
+
return make_unique<ParallelCSVGlobalState>(context, move(file_handle), bind_data.files,
|
|
126611
|
+
context.db->NumberOfThreads(), bind_data.options.buffer_size,
|
|
126612
|
+
rows_to_skip);
|
|
126332
126613
|
}
|
|
126333
126614
|
|
|
126334
126615
|
//===--------------------------------------------------------------------===//
|
|
@@ -138102,6 +138383,9 @@ void DatabaseInstance::Configure(DBConfig &new_config) {
|
|
|
138102
138383
|
if (!config.error_manager) {
|
|
138103
138384
|
config.error_manager = make_unique<ErrorManager>();
|
|
138104
138385
|
}
|
|
138386
|
+
if (!config.default_allocator) {
|
|
138387
|
+
config.default_allocator = Allocator::DefaultAllocatorReference();
|
|
138388
|
+
}
|
|
138105
138389
|
}
|
|
138106
138390
|
|
|
138107
138391
|
DBConfig &DBConfig::GetConfig(ClientContext &context) {
|
|
@@ -202438,6 +202722,22 @@ string BufferManager::InMemoryWarning() {
|
|
|
202438
202722
|
"\nOr set PRAGMA temp_directory='/path/to/tmp.tmp'";
|
|
202439
202723
|
}
|
|
202440
202724
|
|
|
202725
|
+
void BufferManager::ReserveMemory(idx_t size) {
|
|
202726
|
+
if (size == 0) {
|
|
202727
|
+
return;
|
|
202728
|
+
}
|
|
202729
|
+
auto reservation =
|
|
202730
|
+
EvictBlocksOrThrow(size, maximum_memory, nullptr, "failed to reserve memory data of size %lld%s", size);
|
|
202731
|
+
reservation.size = 0;
|
|
202732
|
+
}
|
|
202733
|
+
|
|
202734
|
+
void BufferManager::FreeReservedMemory(idx_t size) {
|
|
202735
|
+
if (size == 0) {
|
|
202736
|
+
return;
|
|
202737
|
+
}
|
|
202738
|
+
current_memory -= size;
|
|
202739
|
+
}
|
|
202740
|
+
|
|
202441
202741
|
//===--------------------------------------------------------------------===//
|
|
202442
202742
|
// Buffer Allocator
|
|
202443
202743
|
//===--------------------------------------------------------------------===//
|