duckdb 0.5.2-dev748.0 → 0.5.2-dev756.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +71 -40
- package/src/duckdb.hpp +685 -685
- package/src/parquet-amalgamation.cpp +35996 -35996
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -1815,16 +1815,19 @@ namespace duckdb {
|
|
|
1815
1815
|
enum class NodeType : uint8_t { NLeaf = 0, N4 = 1, N16 = 2, N48 = 3, N256 = 4 };
|
|
1816
1816
|
class ART;
|
|
1817
1817
|
class Node;
|
|
1818
|
+
|
|
1819
|
+
// Note: SwizzleablePointer assumes top 33 bits of the block_id are 0. Use a different
|
|
1820
|
+
// pointer implementation if that does not hold.
|
|
1818
1821
|
class SwizzleablePointer;
|
|
1822
|
+
using ARTPointer = SwizzleablePointer;
|
|
1819
1823
|
|
|
1820
1824
|
struct InternalType {
|
|
1821
1825
|
explicit InternalType(Node *n);
|
|
1822
1826
|
|
|
1823
|
-
void Set(uint8_t *key_p, uint16_t key_size_p,
|
|
1824
|
-
|
|
1827
|
+
void Set(uint8_t *key_p, uint16_t key_size_p, ARTPointer *children_p, uint16_t children_size_p);
|
|
1825
1828
|
uint8_t *key;
|
|
1826
1829
|
uint16_t key_size;
|
|
1827
|
-
|
|
1830
|
+
ARTPointer *children;
|
|
1828
1831
|
uint16_t children_size;
|
|
1829
1832
|
};
|
|
1830
1833
|
|
|
@@ -2040,29 +2043,43 @@ private:
|
|
|
2040
2043
|
|
|
2041
2044
|
namespace duckdb {
|
|
2042
2045
|
|
|
2046
|
+
class ART;
|
|
2047
|
+
class Node;
|
|
2048
|
+
|
|
2049
|
+
// SwizzleablePointer assumes that the 64-bit blockId always has 0s in the top
|
|
2050
|
+
// 33 bits. It thus uses 8 bytes of memory rather than 12.
|
|
2043
2051
|
class SwizzleablePointer {
|
|
2044
2052
|
public:
|
|
2045
2053
|
~SwizzleablePointer();
|
|
2046
2054
|
explicit SwizzleablePointer(duckdb::MetaBlockReader &reader);
|
|
2047
2055
|
SwizzleablePointer() : pointer(0) {};
|
|
2048
2056
|
|
|
2049
|
-
|
|
2057
|
+
BlockPointer Serialize(ART &art, duckdb::MetaBlockWriter &writer);
|
|
2050
2058
|
|
|
2051
2059
|
//! Transforms from Node* to uint64_t
|
|
2052
2060
|
SwizzleablePointer &operator=(const Node *ptr);
|
|
2061
|
+
|
|
2062
|
+
//! Unswizzle the pointer (if possible)
|
|
2063
|
+
Node *Unswizzle(ART &art);
|
|
2064
|
+
|
|
2065
|
+
operator bool() const {
|
|
2066
|
+
return pointer;
|
|
2067
|
+
}
|
|
2068
|
+
|
|
2069
|
+
//! Deletes the underlying object (if necessary) and set the pointer to null_ptr
|
|
2070
|
+
void Reset();
|
|
2071
|
+
|
|
2072
|
+
private:
|
|
2073
|
+
uint64_t pointer;
|
|
2074
|
+
|
|
2053
2075
|
friend bool operator!=(const SwizzleablePointer &s_ptr, const uint64_t &ptr);
|
|
2054
2076
|
|
|
2055
2077
|
//! Extracts block info from swizzled pointer
|
|
2056
2078
|
BlockPointer GetSwizzledBlockInfo();
|
|
2057
2079
|
//! Checks if pointer is swizzled
|
|
2058
2080
|
bool IsSwizzled();
|
|
2059
|
-
//! Deletes the underlying object (if necessary) and set the pointer to null_ptr
|
|
2060
|
-
void Reset();
|
|
2061
|
-
//! Unswizzle the pointer (if possible)
|
|
2062
|
-
Node *Unswizzle(ART &art);
|
|
2063
|
-
|
|
2064
|
-
BlockPointer Serialize(ART &art, duckdb::MetaBlockWriter &writer);
|
|
2065
2081
|
};
|
|
2082
|
+
|
|
2066
2083
|
} // namespace duckdb
|
|
2067
2084
|
|
|
2068
2085
|
|
|
@@ -2072,7 +2089,7 @@ class Node16 : public Node {
|
|
|
2072
2089
|
public:
|
|
2073
2090
|
explicit Node16();
|
|
2074
2091
|
uint8_t key[16];
|
|
2075
|
-
|
|
2092
|
+
ARTPointer children[16];
|
|
2076
2093
|
|
|
2077
2094
|
public:
|
|
2078
2095
|
//! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
|
|
@@ -2117,7 +2134,7 @@ namespace duckdb {
|
|
|
2117
2134
|
class Node256 : public Node {
|
|
2118
2135
|
public:
|
|
2119
2136
|
explicit Node256();
|
|
2120
|
-
|
|
2137
|
+
ARTPointer children[256];
|
|
2121
2138
|
|
|
2122
2139
|
public:
|
|
2123
2140
|
//! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
|
|
@@ -2163,7 +2180,8 @@ class Node4 : public Node {
|
|
|
2163
2180
|
public:
|
|
2164
2181
|
Node4();
|
|
2165
2182
|
uint8_t key[4];
|
|
2166
|
-
|
|
2183
|
+
// Pointers to the child nodes
|
|
2184
|
+
ARTPointer children[4];
|
|
2167
2185
|
|
|
2168
2186
|
public:
|
|
2169
2187
|
//! Get position of a byte, returns DConstants::INVALID_INDEX if not exists
|
|
@@ -2209,7 +2227,7 @@ class Node48 : public Node {
|
|
|
2209
2227
|
public:
|
|
2210
2228
|
explicit Node48();
|
|
2211
2229
|
uint8_t child_index[256];
|
|
2212
|
-
|
|
2230
|
+
ARTPointer children[48];
|
|
2213
2231
|
|
|
2214
2232
|
public:
|
|
2215
2233
|
//! Get position of a specific byte, returns DConstants::INVALID_INDEX if not exists
|
|
@@ -59427,7 +59445,7 @@ void Iterator::FindMinimum(Node &node) {
|
|
|
59427
59445
|
}
|
|
59428
59446
|
case NodeType::N256: {
|
|
59429
59447
|
auto &n256 = (Node256 &)node;
|
|
59430
|
-
while (!n256.children[pos]
|
|
59448
|
+
while (!n256.children[pos]) {
|
|
59431
59449
|
pos++;
|
|
59432
59450
|
}
|
|
59433
59451
|
cur_key.Push(pos);
|
|
@@ -59788,7 +59806,7 @@ InternalType::InternalType(Node *n) {
|
|
|
59788
59806
|
}
|
|
59789
59807
|
}
|
|
59790
59808
|
|
|
59791
|
-
void InternalType::Set(uint8_t *key_p, uint16_t key_size_p,
|
|
59809
|
+
void InternalType::Set(uint8_t *key_p, uint16_t key_size_p, ARTPointer *children_p, uint16_t children_size_p) {
|
|
59792
59810
|
key = key_p;
|
|
59793
59811
|
key_size = key_size_p;
|
|
59794
59812
|
children = children_p;
|
|
@@ -59969,7 +59987,7 @@ void Node::DeserializeInternal(duckdb::MetaBlockReader &reader) {
|
|
|
59969
59987
|
}
|
|
59970
59988
|
// Get Child offsets
|
|
59971
59989
|
for (idx_t i = 0; i < internal_type.children_size; i++) {
|
|
59972
|
-
internal_type.children[i] =
|
|
59990
|
+
internal_type.children[i] = ARTPointer(reader);
|
|
59973
59991
|
}
|
|
59974
59992
|
}
|
|
59975
59993
|
|
|
@@ -60217,7 +60235,7 @@ void Node16::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
|
|
|
60217
60235
|
while (pos < node->count && n->key[pos] < key_byte) {
|
|
60218
60236
|
pos++;
|
|
60219
60237
|
}
|
|
60220
|
-
if (n->children[pos]
|
|
60238
|
+
if (n->children[pos]) {
|
|
60221
60239
|
for (idx_t i = n->count; i > pos; i--) {
|
|
60222
60240
|
n->key[i] = n->key[i - 1];
|
|
60223
60241
|
n->children[i] = n->children[i - 1];
|
|
@@ -60255,7 +60273,7 @@ void Node16::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
60255
60273
|
}
|
|
60256
60274
|
// set any remaining nodes as nullptr
|
|
60257
60275
|
for (; pos < 16; pos++) {
|
|
60258
|
-
if (!n->children[pos]
|
|
60276
|
+
if (!n->children[pos]) {
|
|
60259
60277
|
break;
|
|
60260
60278
|
}
|
|
60261
60279
|
n->children[pos] = nullptr;
|
|
@@ -60304,7 +60322,7 @@ Node256::Node256() : Node(NodeType::N256) {
|
|
|
60304
60322
|
}
|
|
60305
60323
|
|
|
60306
60324
|
idx_t Node256::GetChildPos(uint8_t k) {
|
|
60307
|
-
if (children[k]
|
|
60325
|
+
if (children[k]) {
|
|
60308
60326
|
return k;
|
|
60309
60327
|
} else {
|
|
60310
60328
|
return DConstants::INVALID_INDEX;
|
|
@@ -60313,7 +60331,7 @@ idx_t Node256::GetChildPos(uint8_t k) {
|
|
|
60313
60331
|
|
|
60314
60332
|
idx_t Node256::GetChildGreaterEqual(uint8_t k, bool &equal) {
|
|
60315
60333
|
for (idx_t pos = k; pos < 256; pos++) {
|
|
60316
|
-
if (children[pos]
|
|
60334
|
+
if (children[pos]) {
|
|
60317
60335
|
if (pos == k) {
|
|
60318
60336
|
equal = true;
|
|
60319
60337
|
} else {
|
|
@@ -60327,7 +60345,7 @@ idx_t Node256::GetChildGreaterEqual(uint8_t k, bool &equal) {
|
|
|
60327
60345
|
|
|
60328
60346
|
idx_t Node256::GetMin() {
|
|
60329
60347
|
for (idx_t i = 0; i < 256; i++) {
|
|
60330
|
-
if (children[i]
|
|
60348
|
+
if (children[i]) {
|
|
60331
60349
|
return i;
|
|
60332
60350
|
}
|
|
60333
60351
|
}
|
|
@@ -60336,7 +60354,7 @@ idx_t Node256::GetMin() {
|
|
|
60336
60354
|
|
|
60337
60355
|
idx_t Node256::GetNextPos(idx_t pos) {
|
|
60338
60356
|
for (pos == DConstants::INVALID_INDEX ? pos = 0 : pos++; pos < 256; pos++) {
|
|
60339
|
-
if (children[pos]
|
|
60357
|
+
if (children[pos]) {
|
|
60340
60358
|
return pos;
|
|
60341
60359
|
}
|
|
60342
60360
|
}
|
|
@@ -60366,7 +60384,7 @@ void Node256::EraseChild(Node *&node, int pos, ART &art) {
|
|
|
60366
60384
|
auto new_node = new Node48();
|
|
60367
60385
|
new_node->prefix = move(n->prefix);
|
|
60368
60386
|
for (idx_t i = 0; i < 256; i++) {
|
|
60369
|
-
if (n->children[i]
|
|
60387
|
+
if (n->children[i]) {
|
|
60370
60388
|
new_node->child_index[i] = new_node->count;
|
|
60371
60389
|
new_node->children[new_node->count] = n->children[i];
|
|
60372
60390
|
n->children[i] = nullptr;
|
|
@@ -60464,7 +60482,7 @@ void Node4::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
|
|
|
60464
60482
|
while ((pos < node->count) && (n->key[pos] < key_byte)) {
|
|
60465
60483
|
pos++;
|
|
60466
60484
|
}
|
|
60467
|
-
if (n->children[pos]
|
|
60485
|
+
if (n->children[pos]) {
|
|
60468
60486
|
for (idx_t i = n->count; i > pos; i--) {
|
|
60469
60487
|
n->key[i] = n->key[i - 1];
|
|
60470
60488
|
n->children[i] = n->children[i - 1];
|
|
@@ -60605,10 +60623,10 @@ void Node48::InsertChild(Node *&node, uint8_t key_byte, Node *new_child) {
|
|
|
60605
60623
|
if (node->count < 48) {
|
|
60606
60624
|
// Insert element
|
|
60607
60625
|
idx_t pos = n->count;
|
|
60608
|
-
if (n->children[pos]
|
|
60626
|
+
if (n->children[pos]) {
|
|
60609
60627
|
// find an empty position in the node list if the current position is occupied
|
|
60610
60628
|
pos = 0;
|
|
60611
|
-
while (n->children[pos]
|
|
60629
|
+
while (n->children[pos]) {
|
|
60612
60630
|
pos++;
|
|
60613
60631
|
}
|
|
60614
60632
|
}
|
|
@@ -60816,15 +60834,23 @@ SwizzleablePointer::SwizzleablePointer(duckdb::MetaBlockReader &reader) {
|
|
|
60816
60834
|
}
|
|
60817
60835
|
idx_t pointer_size = sizeof(pointer) * 8;
|
|
60818
60836
|
pointer = block_id;
|
|
60837
|
+
// This assumes high 32 bits of pointer are zero.
|
|
60819
60838
|
pointer = pointer << (pointer_size / 2);
|
|
60839
|
+
D_ASSERT((pointer >> (pointer_size / 2)) == block_id);
|
|
60820
60840
|
pointer += offset;
|
|
60821
60841
|
// Set the left most bit to indicate this is a swizzled pointer and send it back to the mother-ship
|
|
60822
60842
|
uint64_t mask = 1;
|
|
60823
60843
|
mask = mask << (pointer_size - 1);
|
|
60844
|
+
// This assumes the 33rd most significant bit of the block_id is zero.
|
|
60824
60845
|
pointer |= mask;
|
|
60825
60846
|
}
|
|
60826
60847
|
|
|
60827
60848
|
SwizzleablePointer &SwizzleablePointer::operator=(const Node *ptr) {
|
|
60849
|
+
// If the object already has a non-swizzled pointer, this will leak memory.
|
|
60850
|
+
//
|
|
60851
|
+
// TODO: If enabled, this assert will fire, indicating a possible leak. If an exception
|
|
60852
|
+
// is thrown here, it will cause a double-free. There is some work to do to make all this safer.
|
|
60853
|
+
// D_ASSERT(empty() || IsSwizzled());
|
|
60828
60854
|
if (sizeof(ptr) == 4) {
|
|
60829
60855
|
pointer = (uint32_t)(size_t)ptr;
|
|
60830
60856
|
} else {
|
|
@@ -60840,11 +60866,14 @@ bool operator!=(const SwizzleablePointer &s_ptr, const uint64_t &ptr) {
|
|
|
60840
60866
|
BlockPointer SwizzleablePointer::GetSwizzledBlockInfo() {
|
|
60841
60867
|
D_ASSERT(IsSwizzled());
|
|
60842
60868
|
idx_t pointer_size = sizeof(pointer) * 8;
|
|
60869
|
+
// This is destructive. Pointer will be invalid after this operation.
|
|
60870
|
+
// That's okay because this is only ever called from Unswizzle.
|
|
60843
60871
|
pointer = pointer & ~(1ULL << (pointer_size - 1));
|
|
60844
60872
|
uint32_t block_id = pointer >> (pointer_size / 2);
|
|
60845
60873
|
uint32_t offset = pointer & 0xffffffff;
|
|
60846
60874
|
return {block_id, offset};
|
|
60847
60875
|
}
|
|
60876
|
+
|
|
60848
60877
|
bool SwizzleablePointer::IsSwizzled() {
|
|
60849
60878
|
idx_t pointer_size = sizeof(pointer) * 8;
|
|
60850
60879
|
return (pointer >> (pointer_size - 1)) & 1;
|
|
@@ -60877,6 +60906,7 @@ BlockPointer SwizzleablePointer::Serialize(ART &art, duckdb::MetaBlockWriter &wr
|
|
|
60877
60906
|
return {(block_id_t)DConstants::INVALID_INDEX, (uint32_t)DConstants::INVALID_INDEX};
|
|
60878
60907
|
}
|
|
60879
60908
|
}
|
|
60909
|
+
|
|
60880
60910
|
} // namespace duckdb
|
|
60881
60911
|
//===----------------------------------------------------------------------===//
|
|
60882
60912
|
// DuckDB
|
|
@@ -101058,9 +101088,16 @@ void DatePartFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
101058
101088
|
|
|
101059
101089
|
|
|
101060
101090
|
|
|
101091
|
+
|
|
101061
101092
|
namespace duckdb {
|
|
101062
101093
|
|
|
101063
101094
|
struct DateSub {
|
|
101095
|
+
static int64_t SubtractMicros(timestamp_t startdate, timestamp_t enddate) {
|
|
101096
|
+
const auto start = Timestamp::GetEpochMicroSeconds(startdate);
|
|
101097
|
+
const auto end = Timestamp::GetEpochMicroSeconds(enddate);
|
|
101098
|
+
return SubtractOperatorOverflowCheck::Operation<int64_t, int64_t, int64_t>(end, start);
|
|
101099
|
+
}
|
|
101100
|
+
|
|
101064
101101
|
template <class TA, class TB, class TR, class OP>
|
|
101065
101102
|
static inline void BinaryExecute(Vector &left, Vector &right, Vector &result, idx_t count) {
|
|
101066
101103
|
BinaryExecutor::ExecuteWithNulls<TA, TB, TR>(
|
|
@@ -101147,55 +101184,49 @@ struct DateSub {
|
|
|
101147
101184
|
struct DayOperator {
|
|
101148
101185
|
template <class TA, class TB, class TR>
|
|
101149
101186
|
static inline TR Operation(TA startdate, TB enddate) {
|
|
101150
|
-
return (
|
|
101151
|
-
Interval::MICROS_PER_DAY;
|
|
101187
|
+
return SubtractMicros(startdate, enddate) / Interval::MICROS_PER_DAY;
|
|
101152
101188
|
}
|
|
101153
101189
|
};
|
|
101154
101190
|
|
|
101155
101191
|
struct WeekOperator {
|
|
101156
101192
|
template <class TA, class TB, class TR>
|
|
101157
101193
|
static inline TR Operation(TA startdate, TB enddate) {
|
|
101158
|
-
return (
|
|
101159
|
-
Interval::MICROS_PER_WEEK;
|
|
101194
|
+
return SubtractMicros(startdate, enddate) / Interval::MICROS_PER_WEEK;
|
|
101160
101195
|
}
|
|
101161
101196
|
};
|
|
101162
101197
|
|
|
101163
101198
|
struct MicrosecondsOperator {
|
|
101164
101199
|
template <class TA, class TB, class TR>
|
|
101165
101200
|
static inline TR Operation(TA startdate, TB enddate) {
|
|
101166
|
-
return (
|
|
101201
|
+
return SubtractMicros(startdate, enddate);
|
|
101167
101202
|
}
|
|
101168
101203
|
};
|
|
101169
101204
|
|
|
101170
101205
|
struct MillisecondsOperator {
|
|
101171
101206
|
template <class TA, class TB, class TR>
|
|
101172
101207
|
static inline TR Operation(TA startdate, TB enddate) {
|
|
101173
|
-
return (
|
|
101174
|
-
Interval::MICROS_PER_MSEC;
|
|
101208
|
+
return SubtractMicros(startdate, enddate) / Interval::MICROS_PER_MSEC;
|
|
101175
101209
|
}
|
|
101176
101210
|
};
|
|
101177
101211
|
|
|
101178
101212
|
struct SecondsOperator {
|
|
101179
101213
|
template <class TA, class TB, class TR>
|
|
101180
101214
|
static inline TR Operation(TA startdate, TB enddate) {
|
|
101181
|
-
return (
|
|
101182
|
-
Interval::MICROS_PER_SEC;
|
|
101215
|
+
return SubtractMicros(startdate, enddate) / Interval::MICROS_PER_SEC;
|
|
101183
101216
|
}
|
|
101184
101217
|
};
|
|
101185
101218
|
|
|
101186
101219
|
struct MinutesOperator {
|
|
101187
101220
|
template <class TA, class TB, class TR>
|
|
101188
101221
|
static inline TR Operation(TA startdate, TB enddate) {
|
|
101189
|
-
return (
|
|
101190
|
-
Interval::MICROS_PER_MINUTE;
|
|
101222
|
+
return SubtractMicros(startdate, enddate) / Interval::MICROS_PER_MINUTE;
|
|
101191
101223
|
}
|
|
101192
101224
|
};
|
|
101193
101225
|
|
|
101194
101226
|
struct HoursOperator {
|
|
101195
101227
|
template <class TA, class TB, class TR>
|
|
101196
101228
|
static inline TR Operation(TA startdate, TB enddate) {
|
|
101197
|
-
return (
|
|
101198
|
-
Interval::MICROS_PER_HOUR;
|
|
101229
|
+
return SubtractMicros(startdate, enddate) / Interval::MICROS_PER_HOUR;
|
|
101199
101230
|
}
|
|
101200
101231
|
};
|
|
101201
101232
|
};
|