duckdb 0.8.2-dev1764.0 → 0.8.2-dev1859.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +38 -22
- package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -4
- package/src/duckdb/src/common/constants.cpp +2 -1
- package/src/duckdb/src/common/enum_util.cpp +5 -5
- package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +8 -0
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +5 -0
- package/src/duckdb/src/common/types/string_heap.cpp +4 -0
- package/src/duckdb/src/core_functions/function_list.cpp +2 -0
- package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
- package/src/duckdb/src/execution/index/art/art.cpp +49 -108
- package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +10 -14
- package/src/duckdb/src/execution/index/art/iterator.cpp +13 -19
- package/src/duckdb/src/execution/index/art/leaf.cpp +290 -241
- package/src/duckdb/src/execution/index/art/node.cpp +104 -95
- package/src/duckdb/src/execution/index/art/node16.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node256.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node4.cpp +6 -6
- package/src/duckdb/src/execution/index/art/node48.cpp +6 -6
- package/src/duckdb/src/execution/index/art/prefix.cpp +49 -39
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +34 -1175
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +4 -14
- package/src/duckdb/src/execution/window_executor.cpp +1280 -0
- package/src/duckdb/src/execution/window_segment_tree.cpp +224 -117
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +43 -40
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +119 -40
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +4 -2
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +60 -53
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
- package/src/duckdb/src/parser/parser.cpp +43 -38
- package/src/duckdb/src/storage/arena_allocator.cpp +12 -0
- package/src/duckdb/src/storage/compression/rle.cpp +52 -12
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_src_execution.cpp +2 -0
- package/src/duckdb/ub_src_execution_index_art.cpp +0 -4
- package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
- package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
- package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
@@ -166,8 +166,8 @@ struct RLECompressState : public CompressionState {
|
|
166
166
|
void WriteValue(T value, rle_count_t count, bool is_null) {
|
167
167
|
// write the RLE entry
|
168
168
|
auto handle_ptr = handle.Ptr() + RLEConstants::RLE_HEADER_SIZE;
|
169
|
-
auto data_pointer =
|
170
|
-
auto index_pointer =
|
169
|
+
auto data_pointer = reinterpret_cast<T *>(handle_ptr);
|
170
|
+
auto index_pointer = reinterpret_cast<rle_count_t *>(handle_ptr + max_rle_count * sizeof(T));
|
171
171
|
data_pointer[entry_count] = value;
|
172
172
|
index_pointer[entry_count] = count;
|
173
173
|
entry_count++;
|
@@ -257,7 +257,7 @@ struct RLEScanState : public SegmentScanState {
|
|
257
257
|
|
258
258
|
void Skip(ColumnSegment &segment, idx_t skip_count) {
|
259
259
|
auto data = handle.Ptr() + segment.GetBlockOffset();
|
260
|
-
auto index_pointer =
|
260
|
+
auto index_pointer = reinterpret_cast<rle_count_t *>(data + rle_count_offset);
|
261
261
|
|
262
262
|
for (idx_t i = 0; i < skip_count; i++) {
|
263
263
|
// assign the current value
|
@@ -292,14 +292,58 @@ void RLESkip(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count) {
|
|
292
292
|
scan_state.Skip(segment, skip_count);
|
293
293
|
}
|
294
294
|
|
295
|
+
static bool CanEmitConstantVector(idx_t position, idx_t run_length, idx_t scan_count) {
|
296
|
+
if (scan_count != STANDARD_VECTOR_SIZE) {
|
297
|
+
// Only when we can fill an entire Vector can we emit a ConstantVector, because subsequent scans require the
|
298
|
+
// input Vector to be flat
|
299
|
+
return false;
|
300
|
+
}
|
301
|
+
D_ASSERT(position < run_length);
|
302
|
+
auto remaining_in_run = run_length - position;
|
303
|
+
// The amount of values left in this run are equal or greater than the amount of values we need to scan
|
304
|
+
return remaining_in_run >= scan_count;
|
305
|
+
}
|
306
|
+
|
307
|
+
template <class T>
|
308
|
+
inline static void ForwardToNextRun(RLEScanState<T> &scan_state) {
|
309
|
+
// handled all entries in this RLE value
|
310
|
+
// move to the next entry
|
311
|
+
scan_state.entry_pos++;
|
312
|
+
scan_state.position_in_entry = 0;
|
313
|
+
}
|
314
|
+
|
315
|
+
template <class T>
|
316
|
+
inline static bool ExhaustedRun(RLEScanState<T> &scan_state, rle_count_t *index_pointer) {
|
317
|
+
return scan_state.position_in_entry >= index_pointer[scan_state.entry_pos];
|
318
|
+
}
|
319
|
+
|
320
|
+
template <class T>
|
321
|
+
static void RLEScanConstant(RLEScanState<T> &scan_state, rle_count_t *index_pointer, T *data_pointer, idx_t scan_count,
|
322
|
+
Vector &result) {
|
323
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
324
|
+
auto result_data = ConstantVector::GetData<T>(result);
|
325
|
+
result_data[0] = data_pointer[scan_state.entry_pos];
|
326
|
+
scan_state.position_in_entry += scan_count;
|
327
|
+
if (ExhaustedRun(scan_state, index_pointer)) {
|
328
|
+
ForwardToNextRun(scan_state);
|
329
|
+
}
|
330
|
+
return;
|
331
|
+
}
|
332
|
+
|
295
333
|
template <class T>
|
296
334
|
void RLEScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result,
|
297
335
|
idx_t result_offset) {
|
298
336
|
auto &scan_state = state.scan_state->Cast<RLEScanState<T>>();
|
299
337
|
|
300
338
|
auto data = scan_state.handle.Ptr() + segment.GetBlockOffset();
|
301
|
-
auto data_pointer =
|
302
|
-
auto index_pointer =
|
339
|
+
auto data_pointer = reinterpret_cast<T *>(data + RLEConstants::RLE_HEADER_SIZE);
|
340
|
+
auto index_pointer = reinterpret_cast<rle_count_t *>(data + scan_state.rle_count_offset);
|
341
|
+
|
342
|
+
// If we are scanning an entire Vector and it contains only a single run
|
343
|
+
if (CanEmitConstantVector(scan_state.position_in_entry, index_pointer[scan_state.entry_pos], scan_count)) {
|
344
|
+
RLEScanConstant<T>(scan_state, index_pointer, data_pointer, scan_count, result);
|
345
|
+
return;
|
346
|
+
}
|
303
347
|
|
304
348
|
auto result_data = FlatVector::GetData<T>(result);
|
305
349
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
@@ -307,18 +351,14 @@ void RLEScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_c
|
|
307
351
|
// assign the current value
|
308
352
|
result_data[result_offset + i] = data_pointer[scan_state.entry_pos];
|
309
353
|
scan_state.position_in_entry++;
|
310
|
-
if (scan_state
|
311
|
-
|
312
|
-
// move to the next entry
|
313
|
-
scan_state.entry_pos++;
|
314
|
-
scan_state.position_in_entry = 0;
|
354
|
+
if (ExhaustedRun(scan_state, index_pointer)) {
|
355
|
+
ForwardToNextRun(scan_state);
|
315
356
|
}
|
316
357
|
}
|
317
358
|
}
|
318
359
|
|
319
360
|
template <class T>
|
320
361
|
void RLEScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) {
|
321
|
-
// FIXME: emit constant vector if repetition of single value is >= scan_count
|
322
362
|
RLEScanPartial<T>(segment, state, scan_count, result, 0);
|
323
363
|
}
|
324
364
|
|
@@ -331,7 +371,7 @@ void RLEFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id,
|
|
331
371
|
scan_state.Skip(segment, row_id);
|
332
372
|
|
333
373
|
auto data = scan_state.handle.Ptr() + segment.GetBlockOffset();
|
334
|
-
auto data_pointer =
|
374
|
+
auto data_pointer = reinterpret_cast<T *>(data + RLEConstants::RLE_HEADER_SIZE);
|
335
375
|
auto result_data = FlatVector::GetData<T>(result);
|
336
376
|
result_data[result_idx] = data_pointer[scan_state.entry_pos];
|
337
377
|
}
|
@@ -8,8 +8,6 @@
|
|
8
8
|
|
9
9
|
#include "src/execution/index/art/leaf.cpp"
|
10
10
|
|
11
|
-
#include "src/execution/index/art/leaf_segment.cpp"
|
12
|
-
|
13
11
|
#include "src/execution/index/art/node4.cpp"
|
14
12
|
|
15
13
|
#include "src/execution/index/art/node16.cpp"
|
@@ -18,8 +16,6 @@
|
|
18
16
|
|
19
17
|
#include "src/execution/index/art/node256.cpp"
|
20
18
|
|
21
|
-
#include "src/execution/index/art/swizzleable_pointer.cpp"
|
22
|
-
|
23
19
|
#include "src/execution/index/art/prefix.cpp"
|
24
20
|
|
25
21
|
#include "src/execution/index/art/art.cpp"
|
@@ -1,52 +0,0 @@
|
|
1
|
-
#include "duckdb/execution/index/art/leaf_segment.hpp"
|
2
|
-
|
3
|
-
#include "duckdb/execution/index/art/art.hpp"
|
4
|
-
#include "duckdb/execution/index/art/node.hpp"
|
5
|
-
|
6
|
-
namespace duckdb {
|
7
|
-
|
8
|
-
LeafSegment &LeafSegment::New(ART &art, Node &node) {
|
9
|
-
|
10
|
-
node.SetPtr(Node::GetAllocator(art, NType::LEAF_SEGMENT).New());
|
11
|
-
node.type = (uint8_t)NType::LEAF_SEGMENT;
|
12
|
-
|
13
|
-
auto &segment = LeafSegment::Get(art, node);
|
14
|
-
segment.next.Reset();
|
15
|
-
return segment;
|
16
|
-
}
|
17
|
-
|
18
|
-
void LeafSegment::Free(ART &art, Node &node) {
|
19
|
-
|
20
|
-
D_ASSERT(node.IsSet());
|
21
|
-
D_ASSERT(!node.IsSwizzled());
|
22
|
-
|
23
|
-
// free next segment
|
24
|
-
auto next_segment = LeafSegment::Get(art, node).next;
|
25
|
-
Node::Free(art, next_segment);
|
26
|
-
}
|
27
|
-
|
28
|
-
LeafSegment &LeafSegment::Append(ART &art, uint32_t &count, const row_t row_id) {
|
29
|
-
|
30
|
-
reference<LeafSegment> segment(*this);
|
31
|
-
auto position = count % Node::LEAF_SEGMENT_SIZE;
|
32
|
-
|
33
|
-
// we need a new segment
|
34
|
-
if (position == 0 && count != 0) {
|
35
|
-
segment = LeafSegment::New(art, next);
|
36
|
-
}
|
37
|
-
|
38
|
-
segment.get().row_ids[position] = row_id;
|
39
|
-
count++;
|
40
|
-
return segment.get();
|
41
|
-
}
|
42
|
-
|
43
|
-
LeafSegment &LeafSegment::GetTail(const ART &art) {
|
44
|
-
|
45
|
-
reference<LeafSegment> segment(*this);
|
46
|
-
while (segment.get().next.IsSet()) {
|
47
|
-
segment = LeafSegment::Get(art, segment.get().next);
|
48
|
-
}
|
49
|
-
return segment.get();
|
50
|
-
}
|
51
|
-
|
52
|
-
} // namespace duckdb
|
@@ -1,22 +0,0 @@
|
|
1
|
-
#include "duckdb/execution/index/art/swizzleable_pointer.hpp"
|
2
|
-
|
3
|
-
#include "duckdb/storage/meta_block_reader.hpp"
|
4
|
-
|
5
|
-
namespace duckdb {
|
6
|
-
|
7
|
-
SwizzleablePointer::SwizzleablePointer(MetaBlockReader &reader) {
|
8
|
-
|
9
|
-
idx_t block_id = reader.Read<block_id_t>();
|
10
|
-
offset = reader.Read<uint32_t>();
|
11
|
-
type = 0;
|
12
|
-
|
13
|
-
if (block_id == DConstants::INVALID_INDEX) {
|
14
|
-
swizzle_flag = 0;
|
15
|
-
return;
|
16
|
-
}
|
17
|
-
|
18
|
-
buffer_id = (uint32_t)block_id;
|
19
|
-
swizzle_flag = 1;
|
20
|
-
}
|
21
|
-
|
22
|
-
} // namespace duckdb
|
@@ -1,38 +0,0 @@
|
|
1
|
-
//===----------------------------------------------------------------------===//
|
2
|
-
// DuckDB
|
3
|
-
//
|
4
|
-
// duckdb/execution/index/art/leaf_segment.hpp
|
5
|
-
//
|
6
|
-
//
|
7
|
-
//===----------------------------------------------------------------------===//
|
8
|
-
#pragma once
|
9
|
-
|
10
|
-
#include "duckdb/execution/index/art/art.hpp"
|
11
|
-
#include "duckdb/execution/index/art/node.hpp"
|
12
|
-
|
13
|
-
namespace duckdb {
|
14
|
-
|
15
|
-
class LeafSegment {
|
16
|
-
public:
|
17
|
-
//! The row IDs stored in this segment
|
18
|
-
row_t row_ids[Node::LEAF_SEGMENT_SIZE];
|
19
|
-
//! The pointer of the next segment, if the row IDs exceeds this segment
|
20
|
-
Node next;
|
21
|
-
|
22
|
-
public:
|
23
|
-
//! Get a new leaf segment node, might cause a new buffer allocation, and initialize it
|
24
|
-
static LeafSegment &New(ART &art, Node &node);
|
25
|
-
//! Get a reference to the leaf segment
|
26
|
-
static inline LeafSegment &Get(const ART &art, const Node ptr) {
|
27
|
-
return *Node::GetAllocator(art, NType::LEAF_SEGMENT).Get<LeafSegment>(ptr);
|
28
|
-
}
|
29
|
-
//! Free the leaf segment and any subsequent ones
|
30
|
-
static void Free(ART &art, Node &node);
|
31
|
-
|
32
|
-
//! Append a row ID to the current segment, or create a new segment containing that row ID
|
33
|
-
LeafSegment &Append(ART &art, uint32_t &count, const row_t row_id);
|
34
|
-
//! Get the tail of a list of segments
|
35
|
-
LeafSegment &GetTail(const ART &art);
|
36
|
-
};
|
37
|
-
|
38
|
-
} // namespace duckdb
|
@@ -1,58 +0,0 @@
|
|
1
|
-
//===----------------------------------------------------------------------===//
|
2
|
-
// DuckDB
|
3
|
-
//
|
4
|
-
// duckdb/execution/index/art/swizzleable_pointer.hpp
|
5
|
-
//
|
6
|
-
//
|
7
|
-
//===----------------------------------------------------------------------===//
|
8
|
-
#pragma once
|
9
|
-
|
10
|
-
#include "duckdb/common/constants.hpp"
|
11
|
-
|
12
|
-
namespace duckdb {
|
13
|
-
|
14
|
-
// classes
|
15
|
-
class MetaBlockReader;
|
16
|
-
|
17
|
-
// structs
|
18
|
-
struct BlockPointer;
|
19
|
-
|
20
|
-
//! SwizzleablePointer provides functions on a (possibly) swizzled pointer. If the swizzle flag is set, then the
|
21
|
-
//! pointer points to a storage address (and has no type), otherwise the pointer has a type and stores
|
22
|
-
//! other information (e.g., a buffer location)
|
23
|
-
class SwizzleablePointer {
|
24
|
-
public:
|
25
|
-
//! Constructs an empty SwizzleablePointer
|
26
|
-
SwizzleablePointer() : swizzle_flag(0), type(0), offset(0), buffer_id(0) {};
|
27
|
-
//! Constructs a swizzled pointer from a buffer ID and an offset
|
28
|
-
explicit SwizzleablePointer(MetaBlockReader &reader);
|
29
|
-
//! Constructs a non-swizzled pointer from a buffer ID and an offset
|
30
|
-
SwizzleablePointer(uint32_t offset, uint32_t buffer_id)
|
31
|
-
: swizzle_flag(0), type(0), offset(offset), buffer_id(buffer_id) {};
|
32
|
-
|
33
|
-
//! The swizzle flag, set if swizzled, not set otherwise
|
34
|
-
uint8_t swizzle_flag : 1;
|
35
|
-
//! The type of the pointer, zero if not set
|
36
|
-
uint8_t type : 7;
|
37
|
-
//! The offset of a memory location
|
38
|
-
uint32_t offset : 24;
|
39
|
-
//! The buffer ID of a memory location
|
40
|
-
uint32_t buffer_id : 32;
|
41
|
-
|
42
|
-
public:
|
43
|
-
//! Checks if the pointer is swizzled
|
44
|
-
inline bool IsSwizzled() const {
|
45
|
-
return swizzle_flag;
|
46
|
-
}
|
47
|
-
//! Returns true, if neither the swizzle flag nor the type is set, and false otherwise
|
48
|
-
inline bool IsSet() const {
|
49
|
-
return swizzle_flag || type;
|
50
|
-
}
|
51
|
-
//! Reset the pointer
|
52
|
-
inline void Reset() {
|
53
|
-
swizzle_flag = 0;
|
54
|
-
type = 0;
|
55
|
-
}
|
56
|
-
};
|
57
|
-
|
58
|
-
} // namespace duckdb
|