duckdb 0.8.2-dev1764.0 → 0.8.2-dev1859.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +7 -0
  2. package/binding.gyp +1 -0
  3. package/package.json +1 -1
  4. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -0
  5. package/src/duckdb/extension/parquet/parquet_extension.cpp +38 -22
  6. package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -4
  7. package/src/duckdb/src/common/constants.cpp +2 -1
  8. package/src/duckdb/src/common/enum_util.cpp +5 -5
  9. package/src/duckdb/src/common/sort/sort_state.cpp +1 -1
  10. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  11. package/src/duckdb/src/common/types/column/column_data_collection.cpp +8 -0
  12. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +5 -0
  13. package/src/duckdb/src/common/types/string_heap.cpp +4 -0
  14. package/src/duckdb/src/core_functions/function_list.cpp +2 -0
  15. package/src/duckdb/src/core_functions/scalar/debug/vector_type.cpp +23 -0
  16. package/src/duckdb/src/execution/index/art/art.cpp +49 -108
  17. package/src/duckdb/src/execution/index/art/art_key.cpp +0 -11
  18. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +10 -14
  19. package/src/duckdb/src/execution/index/art/iterator.cpp +13 -19
  20. package/src/duckdb/src/execution/index/art/leaf.cpp +290 -241
  21. package/src/duckdb/src/execution/index/art/node.cpp +104 -95
  22. package/src/duckdb/src/execution/index/art/node16.cpp +6 -6
  23. package/src/duckdb/src/execution/index/art/node256.cpp +6 -6
  24. package/src/duckdb/src/execution/index/art/node4.cpp +6 -6
  25. package/src/duckdb/src/execution/index/art/node48.cpp +6 -6
  26. package/src/duckdb/src/execution/index/art/prefix.cpp +49 -39
  27. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +34 -1175
  28. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +4 -14
  29. package/src/duckdb/src/execution/window_executor.cpp +1280 -0
  30. package/src/duckdb/src/execution/window_segment_tree.cpp +224 -117
  31. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  32. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  33. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  34. package/src/duckdb/src/include/duckdb/common/typedefs.hpp +8 -0
  35. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +10 -0
  36. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +3 -0
  37. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -0
  38. package/src/duckdb/src/include/duckdb/common/types/string_heap.hpp +3 -0
  39. package/src/duckdb/src/include/duckdb/core_functions/scalar/debug_functions.hpp +27 -0
  40. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +0 -1
  42. package/src/duckdb/src/include/duckdb/execution/index/art/fixed_size_allocator.hpp +22 -24
  43. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -2
  44. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +43 -40
  45. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +119 -40
  46. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +1 -0
  47. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +1 -0
  49. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +1 -0
  50. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +4 -2
  51. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +313 -0
  52. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +60 -53
  53. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
  54. package/src/duckdb/src/parser/parser.cpp +43 -38
  55. package/src/duckdb/src/storage/arena_allocator.cpp +12 -0
  56. package/src/duckdb/src/storage/compression/rle.cpp +52 -12
  57. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +2 -0
  58. package/src/duckdb/ub_src_execution.cpp +2 -0
  59. package/src/duckdb/ub_src_execution_index_art.cpp +0 -4
  60. package/src/duckdb/src/execution/index/art/leaf_segment.cpp +0 -52
  61. package/src/duckdb/src/execution/index/art/swizzleable_pointer.cpp +0 -22
  62. package/src/duckdb/src/include/duckdb/execution/index/art/leaf_segment.hpp +0 -38
  63. package/src/duckdb/src/include/duckdb/execution/index/art/swizzleable_pointer.hpp +0 -58
@@ -166,8 +166,8 @@ struct RLECompressState : public CompressionState {
166
166
  void WriteValue(T value, rle_count_t count, bool is_null) {
167
167
  // write the RLE entry
168
168
  auto handle_ptr = handle.Ptr() + RLEConstants::RLE_HEADER_SIZE;
169
- auto data_pointer = (T *)handle_ptr;
170
- auto index_pointer = (rle_count_t *)(handle_ptr + max_rle_count * sizeof(T));
169
+ auto data_pointer = reinterpret_cast<T *>(handle_ptr);
170
+ auto index_pointer = reinterpret_cast<rle_count_t *>(handle_ptr + max_rle_count * sizeof(T));
171
171
  data_pointer[entry_count] = value;
172
172
  index_pointer[entry_count] = count;
173
173
  entry_count++;
@@ -257,7 +257,7 @@ struct RLEScanState : public SegmentScanState {
257
257
 
258
258
  void Skip(ColumnSegment &segment, idx_t skip_count) {
259
259
  auto data = handle.Ptr() + segment.GetBlockOffset();
260
- auto index_pointer = (rle_count_t *)(data + rle_count_offset);
260
+ auto index_pointer = reinterpret_cast<rle_count_t *>(data + rle_count_offset);
261
261
 
262
262
  for (idx_t i = 0; i < skip_count; i++) {
263
263
  // assign the current value
@@ -292,14 +292,58 @@ void RLESkip(ColumnSegment &segment, ColumnScanState &state, idx_t skip_count) {
292
292
  scan_state.Skip(segment, skip_count);
293
293
  }
294
294
 
295
+ static bool CanEmitConstantVector(idx_t position, idx_t run_length, idx_t scan_count) {
296
+ if (scan_count != STANDARD_VECTOR_SIZE) {
297
+ // Only when we can fill an entire Vector can we emit a ConstantVector, because subsequent scans require the
298
+ // input Vector to be flat
299
+ return false;
300
+ }
301
+ D_ASSERT(position < run_length);
302
+ auto remaining_in_run = run_length - position;
303
+ // The amount of values left in this run are equal or greater than the amount of values we need to scan
304
+ return remaining_in_run >= scan_count;
305
+ }
306
+
307
+ template <class T>
308
+ inline static void ForwardToNextRun(RLEScanState<T> &scan_state) {
309
+ // handled all entries in this RLE value
310
+ // move to the next entry
311
+ scan_state.entry_pos++;
312
+ scan_state.position_in_entry = 0;
313
+ }
314
+
315
+ template <class T>
316
+ inline static bool ExhaustedRun(RLEScanState<T> &scan_state, rle_count_t *index_pointer) {
317
+ return scan_state.position_in_entry >= index_pointer[scan_state.entry_pos];
318
+ }
319
+
320
+ template <class T>
321
+ static void RLEScanConstant(RLEScanState<T> &scan_state, rle_count_t *index_pointer, T *data_pointer, idx_t scan_count,
322
+ Vector &result) {
323
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
324
+ auto result_data = ConstantVector::GetData<T>(result);
325
+ result_data[0] = data_pointer[scan_state.entry_pos];
326
+ scan_state.position_in_entry += scan_count;
327
+ if (ExhaustedRun(scan_state, index_pointer)) {
328
+ ForwardToNextRun(scan_state);
329
+ }
330
+ return;
331
+ }
332
+
295
333
  template <class T>
296
334
  void RLEScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result,
297
335
  idx_t result_offset) {
298
336
  auto &scan_state = state.scan_state->Cast<RLEScanState<T>>();
299
337
 
300
338
  auto data = scan_state.handle.Ptr() + segment.GetBlockOffset();
301
- auto data_pointer = (T *)(data + RLEConstants::RLE_HEADER_SIZE);
302
- auto index_pointer = (rle_count_t *)(data + scan_state.rle_count_offset);
339
+ auto data_pointer = reinterpret_cast<T *>(data + RLEConstants::RLE_HEADER_SIZE);
340
+ auto index_pointer = reinterpret_cast<rle_count_t *>(data + scan_state.rle_count_offset);
341
+
342
+ // If we are scanning an entire Vector and it contains only a single run
343
+ if (CanEmitConstantVector(scan_state.position_in_entry, index_pointer[scan_state.entry_pos], scan_count)) {
344
+ RLEScanConstant<T>(scan_state, index_pointer, data_pointer, scan_count, result);
345
+ return;
346
+ }
303
347
 
304
348
  auto result_data = FlatVector::GetData<T>(result);
305
349
  result.SetVectorType(VectorType::FLAT_VECTOR);
@@ -307,18 +351,14 @@ void RLEScanPartial(ColumnSegment &segment, ColumnScanState &state, idx_t scan_c
307
351
  // assign the current value
308
352
  result_data[result_offset + i] = data_pointer[scan_state.entry_pos];
309
353
  scan_state.position_in_entry++;
310
- if (scan_state.position_in_entry >= index_pointer[scan_state.entry_pos]) {
311
- // handled all entries in this RLE value
312
- // move to the next entry
313
- scan_state.entry_pos++;
314
- scan_state.position_in_entry = 0;
354
+ if (ExhaustedRun(scan_state, index_pointer)) {
355
+ ForwardToNextRun(scan_state);
315
356
  }
316
357
  }
317
358
  }
318
359
 
319
360
  template <class T>
320
361
  void RLEScan(ColumnSegment &segment, ColumnScanState &state, idx_t scan_count, Vector &result) {
321
- // FIXME: emit constant vector if repetition of single value is >= scan_count
322
362
  RLEScanPartial<T>(segment, state, scan_count, result, 0);
323
363
  }
324
364
 
@@ -331,7 +371,7 @@ void RLEFetchRow(ColumnSegment &segment, ColumnFetchState &state, row_t row_id,
331
371
  scan_state.Skip(segment, row_id);
332
372
 
333
373
  auto data = scan_state.handle.Ptr() + segment.GetBlockOffset();
334
- auto data_pointer = (T *)(data + RLEConstants::RLE_HEADER_SIZE);
374
+ auto data_pointer = reinterpret_cast<T *>(data + RLEConstants::RLE_HEADER_SIZE);
335
375
  auto result_data = FlatVector::GetData<T>(result);
336
376
  result_data[result_idx] = data_pointer[scan_state.entry_pos];
337
377
  }
@@ -0,0 +1,2 @@
1
+ #include "src/core_functions/scalar/debug/vector_type.cpp"
2
+
@@ -24,5 +24,7 @@
24
24
 
25
25
  #include "src/execution/reservoir_sample.cpp"
26
26
 
27
+ #include "src/execution/window_executor.cpp"
28
+
27
29
  #include "src/execution/window_segment_tree.cpp"
28
30
 
@@ -8,8 +8,6 @@
8
8
 
9
9
  #include "src/execution/index/art/leaf.cpp"
10
10
 
11
- #include "src/execution/index/art/leaf_segment.cpp"
12
-
13
11
  #include "src/execution/index/art/node4.cpp"
14
12
 
15
13
  #include "src/execution/index/art/node16.cpp"
@@ -18,8 +16,6 @@
18
16
 
19
17
  #include "src/execution/index/art/node256.cpp"
20
18
 
21
- #include "src/execution/index/art/swizzleable_pointer.cpp"
22
-
23
19
  #include "src/execution/index/art/prefix.cpp"
24
20
 
25
21
  #include "src/execution/index/art/art.cpp"
@@ -1,52 +0,0 @@
1
- #include "duckdb/execution/index/art/leaf_segment.hpp"
2
-
3
- #include "duckdb/execution/index/art/art.hpp"
4
- #include "duckdb/execution/index/art/node.hpp"
5
-
6
- namespace duckdb {
7
-
8
- LeafSegment &LeafSegment::New(ART &art, Node &node) {
9
-
10
- node.SetPtr(Node::GetAllocator(art, NType::LEAF_SEGMENT).New());
11
- node.type = (uint8_t)NType::LEAF_SEGMENT;
12
-
13
- auto &segment = LeafSegment::Get(art, node);
14
- segment.next.Reset();
15
- return segment;
16
- }
17
-
18
- void LeafSegment::Free(ART &art, Node &node) {
19
-
20
- D_ASSERT(node.IsSet());
21
- D_ASSERT(!node.IsSwizzled());
22
-
23
- // free next segment
24
- auto next_segment = LeafSegment::Get(art, node).next;
25
- Node::Free(art, next_segment);
26
- }
27
-
28
- LeafSegment &LeafSegment::Append(ART &art, uint32_t &count, const row_t row_id) {
29
-
30
- reference<LeafSegment> segment(*this);
31
- auto position = count % Node::LEAF_SEGMENT_SIZE;
32
-
33
- // we need a new segment
34
- if (position == 0 && count != 0) {
35
- segment = LeafSegment::New(art, next);
36
- }
37
-
38
- segment.get().row_ids[position] = row_id;
39
- count++;
40
- return segment.get();
41
- }
42
-
43
- LeafSegment &LeafSegment::GetTail(const ART &art) {
44
-
45
- reference<LeafSegment> segment(*this);
46
- while (segment.get().next.IsSet()) {
47
- segment = LeafSegment::Get(art, segment.get().next);
48
- }
49
- return segment.get();
50
- }
51
-
52
- } // namespace duckdb
@@ -1,22 +0,0 @@
1
- #include "duckdb/execution/index/art/swizzleable_pointer.hpp"
2
-
3
- #include "duckdb/storage/meta_block_reader.hpp"
4
-
5
- namespace duckdb {
6
-
7
- SwizzleablePointer::SwizzleablePointer(MetaBlockReader &reader) {
8
-
9
- idx_t block_id = reader.Read<block_id_t>();
10
- offset = reader.Read<uint32_t>();
11
- type = 0;
12
-
13
- if (block_id == DConstants::INVALID_INDEX) {
14
- swizzle_flag = 0;
15
- return;
16
- }
17
-
18
- buffer_id = (uint32_t)block_id;
19
- swizzle_flag = 1;
20
- }
21
-
22
- } // namespace duckdb
@@ -1,38 +0,0 @@
1
- //===----------------------------------------------------------------------===//
2
- // DuckDB
3
- //
4
- // duckdb/execution/index/art/leaf_segment.hpp
5
- //
6
- //
7
- //===----------------------------------------------------------------------===//
8
- #pragma once
9
-
10
- #include "duckdb/execution/index/art/art.hpp"
11
- #include "duckdb/execution/index/art/node.hpp"
12
-
13
- namespace duckdb {
14
-
15
- class LeafSegment {
16
- public:
17
- //! The row IDs stored in this segment
18
- row_t row_ids[Node::LEAF_SEGMENT_SIZE];
19
- //! The pointer of the next segment, if the row IDs exceeds this segment
20
- Node next;
21
-
22
- public:
23
- //! Get a new leaf segment node, might cause a new buffer allocation, and initialize it
24
- static LeafSegment &New(ART &art, Node &node);
25
- //! Get a reference to the leaf segment
26
- static inline LeafSegment &Get(const ART &art, const Node ptr) {
27
- return *Node::GetAllocator(art, NType::LEAF_SEGMENT).Get<LeafSegment>(ptr);
28
- }
29
- //! Free the leaf segment and any subsequent ones
30
- static void Free(ART &art, Node &node);
31
-
32
- //! Append a row ID to the current segment, or create a new segment containing that row ID
33
- LeafSegment &Append(ART &art, uint32_t &count, const row_t row_id);
34
- //! Get the tail of a list of segments
35
- LeafSegment &GetTail(const ART &art);
36
- };
37
-
38
- } // namespace duckdb
@@ -1,58 +0,0 @@
1
- //===----------------------------------------------------------------------===//
2
- // DuckDB
3
- //
4
- // duckdb/execution/index/art/swizzleable_pointer.hpp
5
- //
6
- //
7
- //===----------------------------------------------------------------------===//
8
- #pragma once
9
-
10
- #include "duckdb/common/constants.hpp"
11
-
12
- namespace duckdb {
13
-
14
- // classes
15
- class MetaBlockReader;
16
-
17
- // structs
18
- struct BlockPointer;
19
-
20
- //! SwizzleablePointer provides functions on a (possibly) swizzled pointer. If the swizzle flag is set, then the
21
- //! pointer points to a storage address (and has no type), otherwise the pointer has a type and stores
22
- //! other information (e.g., a buffer location)
23
- class SwizzleablePointer {
24
- public:
25
- //! Constructs an empty SwizzleablePointer
26
- SwizzleablePointer() : swizzle_flag(0), type(0), offset(0), buffer_id(0) {};
27
- //! Constructs a swizzled pointer from a buffer ID and an offset
28
- explicit SwizzleablePointer(MetaBlockReader &reader);
29
- //! Constructs a non-swizzled pointer from a buffer ID and an offset
30
- SwizzleablePointer(uint32_t offset, uint32_t buffer_id)
31
- : swizzle_flag(0), type(0), offset(offset), buffer_id(buffer_id) {};
32
-
33
- //! The swizzle flag, set if swizzled, not set otherwise
34
- uint8_t swizzle_flag : 1;
35
- //! The type of the pointer, zero if not set
36
- uint8_t type : 7;
37
- //! The offset of a memory location
38
- uint32_t offset : 24;
39
- //! The buffer ID of a memory location
40
- uint32_t buffer_id : 32;
41
-
42
- public:
43
- //! Checks if the pointer is swizzled
44
- inline bool IsSwizzled() const {
45
- return swizzle_flag;
46
- }
47
- //! Returns true, if neither the swizzle flag nor the type is set, and false otherwise
48
- inline bool IsSet() const {
49
- return swizzle_flag || type;
50
- }
51
- //! Reset the pointer
52
- inline void Reset() {
53
- swizzle_flag = 0;
54
- type = 0;
55
- }
56
- };
57
-
58
- } // namespace duckdb