duckdb 1.4.2-dev4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/package.json +2 -2
  2. package/src/duckdb/extension/icu/icu_extension.cpp +67 -6
  3. package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +9 -3
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +12 -0
  5. package/src/duckdb/extension/json/json_functions/json_create.cpp +10 -10
  6. package/src/duckdb/extension/parquet/decoder/delta_length_byte_array_decoder.cpp +19 -5
  7. package/src/duckdb/extension/parquet/include/decoder/delta_length_byte_array_decoder.hpp +1 -1
  8. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +11 -2
  9. package/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp +2 -1
  10. package/src/duckdb/extension/parquet/parquet_reader.cpp +3 -1
  11. package/src/duckdb/extension/parquet/parquet_writer.cpp +16 -1
  12. package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
  13. package/src/duckdb/extension/parquet/writer/primitive_column_writer.cpp +1 -1
  14. package/src/duckdb/src/catalog/default/default_table_functions.cpp +1 -1
  15. package/src/duckdb/src/common/adbc/adbc.cpp +8 -6
  16. package/src/duckdb/src/common/csv_writer.cpp +1 -13
  17. package/src/duckdb/src/common/encryption_key_manager.cpp +10 -9
  18. package/src/duckdb/src/common/enum_util.cpp +19 -0
  19. package/src/duckdb/src/common/enums/compression_type.cpp +51 -16
  20. package/src/duckdb/src/common/exception/binder_exception.cpp +7 -2
  21. package/src/duckdb/src/common/progress_bar/unscented_kalman_filter.cpp +2 -2
  22. package/src/duckdb/src/common/random_engine.cpp +10 -0
  23. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +13 -2
  24. package/src/duckdb/src/execution/index/art/art.cpp +6 -3
  25. package/src/duckdb/src/execution/index/bound_index.cpp +32 -21
  26. package/src/duckdb/src/execution/index/unbound_index.cpp +20 -9
  27. package/src/duckdb/src/execution/join_hashtable.cpp +9 -3
  28. package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +1 -1
  29. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
  30. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +5 -0
  31. package/src/duckdb/src/function/cast/cast_function_set.cpp +3 -1
  32. package/src/duckdb/src/function/macro_function.cpp +1 -1
  33. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -1
  34. package/src/duckdb/src/function/scalar/create_sort_key.cpp +5 -3
  35. package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +1 -1
  36. package/src/duckdb/src/function/scalar/system/parse_log_message.cpp +4 -2
  37. package/src/duckdb/src/function/table/copy_csv.cpp +28 -4
  38. package/src/duckdb/src/function/table/direct_file_reader.cpp +10 -0
  39. package/src/duckdb/src/function/table/read_file.cpp +65 -1
  40. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  41. package/src/duckdb/src/include/duckdb/common/csv_writer.hpp +0 -3
  42. package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +2 -0
  43. package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +5 -0
  44. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  45. package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +42 -2
  46. package/src/duckdb/src/include/duckdb/common/http_util.hpp +7 -0
  47. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +0 -11
  49. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +2 -0
  50. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +1 -0
  51. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +6 -6
  52. package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +115 -97
  53. package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +54 -0
  54. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +21 -2
  55. package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +26 -8
  56. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +2 -0
  57. package/src/duckdb/src/include/duckdb/function/table/read_file.hpp +0 -49
  58. package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +1 -1
  59. package/src/duckdb/src/include/duckdb/logging/log_type.hpp +14 -0
  60. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +2 -1
  61. package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  65. package/src/duckdb/src/include/duckdb/main/database.hpp +2 -2
  66. package/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +10 -6
  67. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +4 -0
  68. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +1 -0
  69. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +1 -0
  70. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -0
  71. package/src/duckdb/src/include/duckdb/main/relation/insert_relation.hpp +2 -0
  72. package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/main/relation.hpp +10 -2
  74. package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
  75. package/src/duckdb/src/include/duckdb/optimizer/filter_pullup.hpp +10 -14
  76. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +5 -1
  77. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +3 -0
  78. package/src/duckdb/src/include/duckdb/planner/bound_statement.hpp +1 -0
  79. package/src/duckdb/src/include/duckdb/storage/block.hpp +9 -0
  80. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +9 -2
  81. package/src/duckdb/src/include/duckdb/storage/index.hpp +8 -2
  82. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  83. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +0 -7
  85. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -2
  86. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +6 -0
  87. package/src/duckdb/src/logging/log_manager.cpp +2 -1
  88. package/src/duckdb/src/logging/log_types.cpp +30 -1
  89. package/src/duckdb/src/main/attached_database.cpp +4 -7
  90. package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +2 -3
  91. package/src/duckdb/src/main/buffered_data/buffered_data.cpp +2 -3
  92. package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +1 -2
  93. package/src/duckdb/src/main/capi/prepared-c.cpp +9 -2
  94. package/src/duckdb/src/main/config.cpp +6 -5
  95. package/src/duckdb/src/main/database.cpp +9 -3
  96. package/src/duckdb/src/main/database_file_path_manager.cpp +43 -14
  97. package/src/duckdb/src/main/database_manager.cpp +1 -1
  98. package/src/duckdb/src/main/http/http_util.cpp +19 -1
  99. package/src/duckdb/src/main/profiling_info.cpp +11 -0
  100. package/src/duckdb/src/main/query_profiler.cpp +16 -0
  101. package/src/duckdb/src/main/relation/create_table_relation.cpp +9 -0
  102. package/src/duckdb/src/main/relation/insert_relation.cpp +7 -0
  103. package/src/duckdb/src/main/relation/table_relation.cpp +14 -0
  104. package/src/duckdb/src/main/relation.cpp +28 -12
  105. package/src/duckdb/src/main/settings/custom_settings.cpp +9 -3
  106. package/src/duckdb/src/optimizer/filter_pullup.cpp +14 -0
  107. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +29 -10
  108. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +7 -0
  109. package/src/duckdb/src/parallel/task_executor.cpp +4 -2
  110. package/src/duckdb/src/parser/query_node/cte_node.cpp +79 -0
  111. package/src/duckdb/src/parser/transform/expression/transform_cast.cpp +3 -1
  112. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +1 -0
  113. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +12 -4
  114. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +16 -12
  115. package/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp +42 -5
  116. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +0 -24
  117. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +1 -1
  118. package/src/duckdb/src/planner/binder.cpp +0 -1
  119. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +1 -2
  120. package/src/duckdb/src/storage/buffer/block_manager.cpp +20 -6
  121. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +8 -6
  122. package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -22
  123. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +7 -0
  124. package/src/duckdb/src/storage/compression/zstd.cpp +34 -12
  125. package/src/duckdb/src/storage/data_table.cpp +1 -1
  126. package/src/duckdb/src/storage/local_storage.cpp +15 -2
  127. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -6
  128. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +11 -15
  129. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +1 -1
  130. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +1 -19
  131. package/src/duckdb/src/storage/single_file_block_manager.cpp +33 -3
  132. package/src/duckdb/src/storage/standard_buffer_manager.cpp +3 -1
  133. package/src/duckdb/src/storage/storage_info.cpp +4 -0
  134. package/src/duckdb/src/storage/storage_manager.cpp +8 -0
  135. package/src/duckdb/src/storage/table/array_column_data.cpp +1 -1
  136. package/src/duckdb/src/storage/table/column_data.cpp +3 -2
  137. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -2
  138. package/src/duckdb/src/storage/table/row_group.cpp +41 -24
  139. package/src/duckdb/src/storage/table/row_group_collection.cpp +114 -11
  140. package/src/duckdb/src/storage/table_index_list.cpp +18 -5
  141. package/src/duckdb/src/transaction/cleanup_state.cpp +7 -2
  142. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +5 -0
  143. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +8 -21
  144. package/src/duckdb/third_party/parquet/parquet_types.cpp +57 -35
  145. package/src/duckdb/third_party/parquet/parquet_types.h +9 -2
  146. package/src/duckdb/ub_src_common_types_row.cpp +0 -2
@@ -66,6 +66,8 @@ public:
66
66
  static void KeyDerivationFunctionSHA256(data_ptr_t user_key, idx_t user_key_size, data_ptr_t salt,
67
67
  data_ptr_t derived_key);
68
68
  static string Base64Decode(const string &key);
69
+
70
+ //! Generate a (non-cryptographically secure) random key ID
69
71
  static string GenerateRandomKeyID();
70
72
 
71
73
  public:
@@ -59,6 +59,11 @@ public:
59
59
 
60
60
  virtual ~EncryptionUtil() {
61
61
  }
62
+
63
+ //! Whether the EncryptionUtil supports encryption (some may only support decryption)
64
+ DUCKDB_API virtual bool SupportsEncryption() {
65
+ return true;
66
+ }
62
67
  };
63
68
 
64
69
  } // namespace duckdb
@@ -86,6 +86,8 @@ enum class BlockIteratorStateType : int8_t;
86
86
 
87
87
  enum class BlockState : uint8_t;
88
88
 
89
+ enum class BufferedIndexReplay : uint8_t;
90
+
89
91
  enum class CAPIResultSetType : uint8_t;
90
92
 
91
93
  enum class CSVState : uint8_t;
@@ -528,6 +530,9 @@ const char* EnumUtil::ToChars<BlockIteratorStateType>(BlockIteratorStateType val
528
530
  template<>
529
531
  const char* EnumUtil::ToChars<BlockState>(BlockState value);
530
532
 
533
+ template<>
534
+ const char* EnumUtil::ToChars<BufferedIndexReplay>(BufferedIndexReplay value);
535
+
531
536
  template<>
532
537
  const char* EnumUtil::ToChars<CAPIResultSetType>(CAPIResultSetType value);
533
538
 
@@ -1150,6 +1155,9 @@ BlockIteratorStateType EnumUtil::FromString<BlockIteratorStateType>(const char *
1150
1155
  template<>
1151
1156
  BlockState EnumUtil::FromString<BlockState>(const char *value);
1152
1157
 
1158
+ template<>
1159
+ BufferedIndexReplay EnumUtil::FromString<BufferedIndexReplay>(const char *value);
1160
+
1153
1161
  template<>
1154
1162
  CAPIResultSetType EnumUtil::FromString<CAPIResultSetType>(const char *value);
1155
1163
 
@@ -36,8 +36,48 @@ enum class CompressionType : uint8_t {
36
36
  COMPRESSION_COUNT // This has to stay the last entry of the type!
37
37
  };
38
38
 
39
- bool CompressionTypeIsDeprecated(CompressionType compression_type,
40
- optional_ptr<StorageManager> storage_manager = nullptr);
39
+ struct CompressionAvailabilityResult {
40
+ private:
41
+ enum class UnavailableReason : uint8_t {
42
+ AVAILABLE,
43
+ //! Introduced later, not available to this version
44
+ NOT_AVAILABLE_YET,
45
+ //! Used to be available, but isnt anymore
46
+ DEPRECATED
47
+ };
48
+
49
+ public:
50
+ CompressionAvailabilityResult() = default;
51
+ static CompressionAvailabilityResult Deprecated() {
52
+ return CompressionAvailabilityResult(UnavailableReason::DEPRECATED);
53
+ }
54
+ static CompressionAvailabilityResult NotAvailableYet() {
55
+ return CompressionAvailabilityResult(UnavailableReason::NOT_AVAILABLE_YET);
56
+ }
57
+
58
+ public:
59
+ bool IsAvailable() const {
60
+ return reason == UnavailableReason::AVAILABLE;
61
+ }
62
+ bool IsDeprecated() {
63
+ D_ASSERT(!IsAvailable());
64
+ return reason == UnavailableReason::DEPRECATED;
65
+ }
66
+ bool IsNotAvailableYet() {
67
+ D_ASSERT(!IsAvailable());
68
+ return reason == UnavailableReason::NOT_AVAILABLE_YET;
69
+ }
70
+
71
+ private:
72
+ explicit CompressionAvailabilityResult(UnavailableReason reason) : reason(reason) {
73
+ }
74
+
75
+ public:
76
+ UnavailableReason reason = UnavailableReason::AVAILABLE;
77
+ };
78
+
79
+ CompressionAvailabilityResult CompressionTypeIsAvailable(CompressionType compression_type,
80
+ optional_ptr<StorageManager> storage_manager = nullptr);
41
81
  vector<string> ListCompressionTypes(void);
42
82
  CompressionType CompressionTypeFromString(const string &str);
43
83
  string CompressionTypeToString(CompressionType type);
@@ -11,6 +11,7 @@
11
11
  #include "duckdb/common/types.hpp"
12
12
  #include "duckdb/common/case_insensitive_map.hpp"
13
13
  #include "duckdb/common/enums/http_status_code.hpp"
14
+ #include "duckdb/common/types/timestamp.hpp"
14
15
  #include <functional>
15
16
 
16
17
  namespace duckdb {
@@ -143,6 +144,11 @@ struct BaseRequest {
143
144
  //! Whether or not to return failed requests (instead of throwing)
144
145
  bool try_request = false;
145
146
 
147
+ // Requests will optionally contain their timings
148
+ bool have_request_timing = false;
149
+ timestamp_t request_start;
150
+ timestamp_t request_end;
151
+
146
152
  template <class TARGET>
147
153
  TARGET &Cast() {
148
154
  return reinterpret_cast<TARGET &>(*this);
@@ -210,6 +216,7 @@ struct PostRequestInfo : public BaseRequest {
210
216
  class HTTPClient {
211
217
  public:
212
218
  virtual ~HTTPClient() = default;
219
+ virtual void Initialize(HTTPParams &http_params) = 0;
213
220
 
214
221
  virtual unique_ptr<HTTPResponse> Get(GetRequestInfo &info) = 0;
215
222
  virtual unique_ptr<HTTPResponse> Put(PutRequestInfo &info) = 0;
@@ -76,7 +76,7 @@ public:
76
76
  DUCKDB_API explicit operator int16_t() const;
77
77
  DUCKDB_API explicit operator int32_t() const;
78
78
  DUCKDB_API explicit operator int64_t() const;
79
- DUCKDB_API operator uhugeint_t() const; // NOLINT: Allow implicit conversion from `hugeint_t`
79
+ DUCKDB_API explicit operator uhugeint_t() const;
80
80
  };
81
81
 
82
82
  } // namespace duckdb
@@ -210,15 +210,4 @@ inline bool GreaterThan::Operation(const interval_t &left, const interval_t &rig
210
210
  return Interval::GreaterThan(left, right);
211
211
  }
212
212
 
213
- //===--------------------------------------------------------------------===//
214
- // Specialized Hugeint Comparison Operators
215
- //===--------------------------------------------------------------------===//
216
- template <>
217
- inline bool Equals::Operation(const hugeint_t &left, const hugeint_t &right) {
218
- return Hugeint::Equals(left, right);
219
- }
220
- template <>
221
- inline bool GreaterThan::Operation(const hugeint_t &left, const hugeint_t &right) {
222
- return Hugeint::GreaterThan(left, right);
223
- }
224
213
  } // namespace duckdb
@@ -38,6 +38,8 @@ public:
38
38
 
39
39
  void SetSeed(uint64_t seed);
40
40
 
41
+ void RandomData(duckdb::data_ptr_t data, duckdb::idx_t len);
42
+
41
43
  static RandomEngine &Get(ClientContext &context);
42
44
 
43
45
  mutex lock;
@@ -26,6 +26,7 @@ applications, and to alter it and redistribute it freely, subject to the followi
26
26
  #include "duckdb/common/helper.hpp"
27
27
  #include "duckdb/common/types.hpp"
28
28
  #include "duckdb/common/unique_ptr.hpp"
29
+ #include "duckdb/common/operator/numeric_cast.hpp"
29
30
 
30
31
  #include <algorithm>
31
32
  #include <cstddef>
@@ -129,38 +129,38 @@ public:
129
129
  static int Sign(hugeint_t n);
130
130
  static hugeint_t Abs(hugeint_t n);
131
131
  // comparison operators
132
- static bool Equals(hugeint_t lhs, hugeint_t rhs) {
132
+ static bool Equals(const hugeint_t &lhs, const hugeint_t &rhs) {
133
133
  bool lower_equals = lhs.lower == rhs.lower;
134
134
  bool upper_equals = lhs.upper == rhs.upper;
135
135
  return lower_equals && upper_equals;
136
136
  }
137
137
 
138
- static bool NotEquals(hugeint_t lhs, hugeint_t rhs) {
138
+ static bool NotEquals(const hugeint_t &lhs, const hugeint_t &rhs) {
139
139
  return !Equals(lhs, rhs);
140
140
  }
141
141
 
142
- static bool GreaterThan(hugeint_t lhs, hugeint_t rhs) {
142
+ static bool GreaterThan(const hugeint_t &lhs, const hugeint_t &rhs) {
143
143
  bool upper_bigger = lhs.upper > rhs.upper;
144
144
  bool upper_equal = lhs.upper == rhs.upper;
145
145
  bool lower_bigger = lhs.lower > rhs.lower;
146
146
  return upper_bigger || (upper_equal && lower_bigger);
147
147
  }
148
148
 
149
- static bool GreaterThanEquals(hugeint_t lhs, hugeint_t rhs) {
149
+ static bool GreaterThanEquals(const hugeint_t &lhs, const hugeint_t &rhs) {
150
150
  bool upper_bigger = lhs.upper > rhs.upper;
151
151
  bool upper_equal = lhs.upper == rhs.upper;
152
152
  bool lower_bigger_equals = lhs.lower >= rhs.lower;
153
153
  return upper_bigger || (upper_equal && lower_bigger_equals);
154
154
  }
155
155
 
156
- static bool LessThan(hugeint_t lhs, hugeint_t rhs) {
156
+ static bool LessThan(const hugeint_t &lhs, const hugeint_t &rhs) {
157
157
  bool upper_smaller = lhs.upper < rhs.upper;
158
158
  bool upper_equal = lhs.upper == rhs.upper;
159
159
  bool lower_smaller = lhs.lower < rhs.lower;
160
160
  return upper_smaller || (upper_equal && lower_smaller);
161
161
  }
162
162
 
163
- static bool LessThanEquals(hugeint_t lhs, hugeint_t rhs) {
163
+ static bool LessThanEquals(const hugeint_t &lhs, const hugeint_t &rhs) {
164
164
  bool upper_smaller = lhs.upper < rhs.upper;
165
165
  bool upper_equal = lhs.upper == rhs.upper;
166
166
  bool lower_smaller_equals = lhs.lower <= rhs.lower;
@@ -23,64 +23,98 @@ enum class BlockIteratorStateType : int8_t {
23
23
  EXTERNAL,
24
24
  };
25
25
 
26
- BlockIteratorStateType GetBlockIteratorStateType(const bool &external);
26
+ static BlockIteratorStateType GetBlockIteratorStateType(const bool &external) {
27
+ return external ? BlockIteratorStateType::EXTERNAL : BlockIteratorStateType::IN_MEMORY;
28
+ }
27
29
 
28
- //! State for iterating over blocks of an in-memory TupleDataCollection
29
- //! Multiple iterators can share the same state, everything is const
30
- class InMemoryBlockIteratorState {
31
- public:
32
- explicit InMemoryBlockIteratorState(const TupleDataCollection &key_data);
33
-
34
- public:
35
- template <class T>
36
- T &GetValueAtIndex(const idx_t &block_idx, const idx_t &tuple_idx) const {
37
- D_ASSERT(GetIndex(block_idx, tuple_idx) < tuple_count);
38
- return reinterpret_cast<T *const>(block_ptrs[block_idx])[tuple_idx];
30
+ template <class BLOCK_ITERATOR_STATE>
31
+ class BlockIteratorStateBase {
32
+ protected:
33
+ friend BLOCK_ITERATOR_STATE;
34
+ explicit BlockIteratorStateBase(const idx_t tuple_count_p) : tuple_count(tuple_count_p) {
39
35
  }
40
36
 
41
- template <class T>
42
- T &GetValueAtIndex(const idx_t &n) const {
43
- const auto quotient = fast_mod.Div(n);
44
- return GetValueAtIndex<T>(quotient, fast_mod.Mod(n, quotient));
37
+ public:
38
+ idx_t GetDivisor() const {
39
+ const auto &state = static_cast<const BLOCK_ITERATOR_STATE &>(*this);
40
+ return state.GetDivisor();
45
41
  }
46
42
 
47
- void RandomAccess(idx_t &block_idx, idx_t &tuple_idx, const idx_t &index) const {
48
- block_idx = fast_mod.Div(index);
49
- tuple_idx = fast_mod.Mod(index, block_idx);
43
+ void RandomAccess(idx_t &block_or_chunk_idx, idx_t &tuple_idx, const idx_t &index) const {
44
+ const auto &state = static_cast<const BLOCK_ITERATOR_STATE &>(*this);
45
+ state.RandomAccessInternal(block_or_chunk_idx, tuple_idx, index);
50
46
  }
51
47
 
52
- void Add(idx_t &block_idx, idx_t &tuple_idx, const idx_t &value) const {
48
+ void Add(idx_t &block_or_chunk_idx, idx_t &tuple_idx, const idx_t &value) const {
53
49
  tuple_idx += value;
54
- if (tuple_idx >= fast_mod.GetDivisor()) {
55
- const auto div = fast_mod.Div(tuple_idx);
56
- tuple_idx -= div * fast_mod.GetDivisor();
57
- block_idx += div;
50
+ if (tuple_idx >= GetDivisor()) {
51
+ RandomAccess(block_or_chunk_idx, tuple_idx, GetIndex(block_or_chunk_idx, tuple_idx));
58
52
  }
59
53
  }
60
54
 
61
- void Subtract(idx_t &block_idx, idx_t &tuple_idx, const idx_t &value) const {
55
+ void Subtract(idx_t &block_or_chunk_idx, idx_t &tuple_idx, const idx_t &value) const {
62
56
  tuple_idx -= value;
63
- if (tuple_idx >= fast_mod.GetDivisor()) {
64
- const auto div = fast_mod.Div(-tuple_idx);
65
- tuple_idx += (div + 1) * fast_mod.GetDivisor();
66
- block_idx -= div + 1;
57
+ if (tuple_idx >= GetDivisor()) {
58
+ RandomAccess(block_or_chunk_idx, tuple_idx, GetIndex(block_or_chunk_idx, tuple_idx));
67
59
  }
68
60
  }
69
61
 
70
- void Increment(idx_t &block_idx, idx_t &tuple_idx) const {
71
- const auto passed_boundary = ++tuple_idx == fast_mod.GetDivisor();
72
- block_idx += passed_boundary;
73
- tuple_idx *= !passed_boundary;
62
+ void Increment(idx_t &block_or_chunk_idx, idx_t &tuple_idx) const {
63
+ const auto crossed_boundary = ++tuple_idx == GetDivisor();
64
+ block_or_chunk_idx += crossed_boundary;
65
+ tuple_idx *= !crossed_boundary;
74
66
  }
75
67
 
76
- void Decrement(idx_t &block_idx, idx_t &tuple_idx) const {
68
+ void Decrement(idx_t &block_or_chunk_idx, idx_t &tuple_idx) const {
77
69
  const auto crossed_boundary = tuple_idx-- == 0;
78
- block_idx -= crossed_boundary;
79
- tuple_idx += crossed_boundary * fast_mod.GetDivisor();
70
+ block_or_chunk_idx -= crossed_boundary;
71
+ tuple_idx += crossed_boundary * GetDivisor();
72
+ }
73
+
74
+ idx_t GetIndex(const idx_t &block_or_chunk_idx, const idx_t &tuple_idx) const {
75
+ return block_or_chunk_idx * GetDivisor() + tuple_idx;
76
+ }
77
+
78
+ protected:
79
+ const idx_t tuple_count;
80
+ };
81
+
82
+ template <BlockIteratorStateType>
83
+ class BlockIteratorState;
84
+
85
+ //! State for iterating over blocks of an in-memory TupleDataCollection
86
+ //! Multiple iterators can share the same state, everything is const
87
+ template <>
88
+ class BlockIteratorState<BlockIteratorStateType::IN_MEMORY>
89
+ : public BlockIteratorStateBase<BlockIteratorState<BlockIteratorStateType::IN_MEMORY>> {
90
+ public:
91
+ explicit BlockIteratorState(const TupleDataCollection &key_data)
92
+ : BlockIteratorStateBase(key_data.Count()), block_ptrs(ConvertBlockPointers(key_data.GetRowBlockPointers())),
93
+ fast_mod(key_data.TuplesPerBlock()) {
94
+ }
95
+
96
+ public:
97
+ idx_t GetDivisor() const {
98
+ return fast_mod.GetDivisor();
99
+ }
100
+
101
+ void RandomAccessInternal(idx_t &block_idx, idx_t &tuple_idx, const idx_t &index) const {
102
+ block_idx = fast_mod.Div(index);
103
+ tuple_idx = fast_mod.Mod(index, block_idx);
104
+ }
105
+
106
+ template <class T>
107
+ T &GetValueAtIndex(const idx_t &block_idx, const idx_t &tuple_idx) const {
108
+ D_ASSERT(GetIndex(block_idx, tuple_idx) < tuple_count);
109
+ return reinterpret_cast<T *const>(block_ptrs[block_idx])[tuple_idx];
80
110
  }
81
111
 
82
- idx_t GetIndex(const idx_t &block_idx, const idx_t &tuple_idx) const {
83
- return block_idx * fast_mod.GetDivisor() + tuple_idx;
112
+ template <class T>
113
+ T &GetValueAtIndex(const idx_t &index) const {
114
+ idx_t block_idx;
115
+ idx_t tuple_idx;
116
+ RandomAccess(block_idx, tuple_idx, index);
117
+ return GetValueAtIndex<T>(block_idx, tuple_idx);
84
118
  }
85
119
 
86
120
  void SetKeepPinned(const bool &) {
@@ -92,72 +126,63 @@ public:
92
126
  }
93
127
 
94
128
  private:
95
- static unsafe_vector<data_ptr_t> ConvertBlockPointers(const vector<data_ptr_t> &block_ptrs);
129
+ static unsafe_vector<data_ptr_t> ConvertBlockPointers(const vector<data_ptr_t> &block_ptrs) {
130
+ unsafe_vector<data_ptr_t> converted_block_ptrs;
131
+ converted_block_ptrs.reserve(block_ptrs.size());
132
+ for (const auto &block_ptr : block_ptrs) {
133
+ converted_block_ptrs.emplace_back(block_ptr);
134
+ }
135
+ return converted_block_ptrs;
136
+ }
96
137
 
97
138
  private:
98
139
  const unsafe_vector<data_ptr_t> block_ptrs;
99
140
  const FastMod<idx_t> fast_mod;
100
- const idx_t tuple_count;
101
141
  };
102
142
 
143
+ using InMemoryBlockIteratorState = BlockIteratorState<BlockIteratorStateType::IN_MEMORY>;
144
+
103
145
  //! State for iterating over blocks of an external (larger-than-memory) TupleDataCollection
104
146
  //! This state cannot be shared by multiple iterators, it is stateful
105
- class ExternalBlockIteratorState {
147
+ template <>
148
+ class BlockIteratorState<BlockIteratorStateType::EXTERNAL>
149
+ : public BlockIteratorStateBase<BlockIteratorState<BlockIteratorStateType::EXTERNAL>> {
106
150
  public:
107
- explicit ExternalBlockIteratorState(TupleDataCollection &key_data, optional_ptr<TupleDataCollection> payload_data);
108
-
109
- public:
110
- template <class T>
111
- T &GetValueAtIndex(const idx_t &chunk_idx, const idx_t &tuple_idx) {
112
- if (chunk_idx != current_chunk_idx) {
113
- InitializeChunk<T>(chunk_idx);
151
+ explicit BlockIteratorState(TupleDataCollection &key_data_p, optional_ptr<TupleDataCollection> payload_data_p)
152
+ : BlockIteratorStateBase(key_data_p.Count()), current_chunk_idx(DConstants::INVALID_INDEX),
153
+ key_data(key_data_p), key_ptrs(FlatVector::GetData<data_ptr_t>(key_scan_state.chunk_state.row_locations)),
154
+ payload_data(payload_data_p), keep_pinned(false), pin_payload(false) {
155
+ key_data.InitializeScan(key_scan_state);
156
+ if (payload_data) {
157
+ payload_data->InitializeScan(payload_scan_state);
114
158
  }
115
- return *reinterpret_cast<T **const>(key_ptrs)[tuple_idx];
116
159
  }
117
160
 
118
- template <class T>
119
- T &GetValueAtIndex(const idx_t &n) {
120
- D_ASSERT(n < tuple_count);
121
- return GetValueAtIndex<T>(n / STANDARD_VECTOR_SIZE, n % STANDARD_VECTOR_SIZE);
161
+ public:
162
+ static constexpr idx_t GetDivisor() {
163
+ return STANDARD_VECTOR_SIZE;
122
164
  }
123
165
 
124
- static void RandomAccess(idx_t &chunk_idx, idx_t &tuple_idx, const idx_t &index) {
166
+ static void RandomAccessInternal(idx_t &chunk_idx, idx_t &tuple_idx, const idx_t &index) {
125
167
  chunk_idx = index / STANDARD_VECTOR_SIZE;
126
168
  tuple_idx = index % STANDARD_VECTOR_SIZE;
127
169
  }
128
170
 
129
- static void Add(idx_t &chunk_idx, idx_t &tuple_idx, const idx_t &value) {
130
- tuple_idx += value;
131
- if (tuple_idx >= STANDARD_VECTOR_SIZE) {
132
- const auto div = tuple_idx / STANDARD_VECTOR_SIZE;
133
- tuple_idx -= div * STANDARD_VECTOR_SIZE;
134
- chunk_idx += div;
135
- }
136
- }
137
-
138
- static void Subtract(idx_t &chunk_idx, idx_t &tuple_idx, const idx_t &value) {
139
- tuple_idx -= value;
140
- if (tuple_idx >= STANDARD_VECTOR_SIZE) {
141
- const auto div = -tuple_idx / STANDARD_VECTOR_SIZE;
142
- tuple_idx += (div + 1) * STANDARD_VECTOR_SIZE;
143
- chunk_idx -= div + 1;
171
+ template <class T>
172
+ T &GetValueAtIndex(const idx_t &chunk_idx, const idx_t &tuple_idx) {
173
+ D_ASSERT(GetIndex(chunk_idx, tuple_idx) < tuple_count);
174
+ if (chunk_idx != current_chunk_idx) {
175
+ InitializeChunk<T>(chunk_idx);
144
176
  }
177
+ return *reinterpret_cast<T **const>(key_ptrs)[tuple_idx];
145
178
  }
146
179
 
147
- static void Increment(idx_t &chunk_idx, idx_t &tuple_idx) {
148
- const auto passed_boundary = ++tuple_idx == STANDARD_VECTOR_SIZE;
149
- chunk_idx += passed_boundary;
150
- tuple_idx *= !passed_boundary;
151
- }
152
-
153
- static void Decrement(idx_t &chunk_idx, idx_t &tuple_idx) {
154
- const auto crossed_boundary = tuple_idx-- == 0;
155
- chunk_idx -= crossed_boundary;
156
- tuple_idx += crossed_boundary * static_cast<idx_t>(STANDARD_VECTOR_SIZE);
157
- }
158
-
159
- static idx_t GetIndex(const idx_t &chunk_idx, const idx_t &tuple_idx) {
160
- return chunk_idx * STANDARD_VECTOR_SIZE + tuple_idx;
180
+ template <class T>
181
+ T &GetValueAtIndex(const idx_t &index) {
182
+ idx_t chunk_idx;
183
+ idx_t tuple_idx;
184
+ RandomAccess(chunk_idx, tuple_idx, index);
185
+ return GetValueAtIndex<T>(chunk_idx, tuple_idx);
161
186
  }
162
187
 
163
188
  void SetKeepPinned(const bool &enable) {
@@ -201,7 +226,6 @@ private:
201
226
  }
202
227
 
203
228
  private:
204
- const idx_t tuple_count;
205
229
  idx_t current_chunk_idx;
206
230
 
207
231
  TupleDataCollection &key_data;
@@ -216,13 +240,7 @@ private:
216
240
  vector<BufferHandle> pins;
217
241
  };
218
242
 
219
- //! Utility so we can get the state using the type
220
- template <BlockIteratorStateType T>
221
- using BlockIteratorState = typename std::conditional<
222
- T == BlockIteratorStateType::IN_MEMORY, InMemoryBlockIteratorState,
223
- typename std::conditional<T == BlockIteratorStateType::EXTERNAL, ExternalBlockIteratorState,
224
- void // Compiler throws error if we get here
225
- >::type>::type;
243
+ using ExternalBlockIteratorState = BlockIteratorState<BlockIteratorStateType::EXTERNAL>;
226
244
 
227
245
  //! Iterator for data spread out over multiple blocks
228
246
  template <class STATE, class T>
@@ -305,16 +323,16 @@ public:
305
323
  return *this;
306
324
  }
307
325
  block_iterator_t operator+(const difference_type &n) const {
308
- idx_t new_block_idx = block_or_chunk_idx;
326
+ idx_t new_block_or_chunk_idx = block_or_chunk_idx;
309
327
  idx_t new_tuple_idx = tuple_idx;
310
- state->Add(new_block_idx, new_tuple_idx, n);
311
- return block_iterator_t(*state, new_block_idx, new_tuple_idx);
328
+ state->Add(new_block_or_chunk_idx, new_tuple_idx, n);
329
+ return block_iterator_t(*state, new_block_or_chunk_idx, new_tuple_idx);
312
330
  }
313
331
  block_iterator_t operator-(const difference_type &n) const {
314
- idx_t new_block_idx = block_or_chunk_idx;
332
+ idx_t new_block_or_chunk_idx = block_or_chunk_idx;
315
333
  idx_t new_tuple_idx = tuple_idx;
316
- state->Subtract(new_block_idx, new_tuple_idx, n);
317
- return block_iterator_t(*state, new_block_idx, new_tuple_idx);
334
+ state->Subtract(new_block_or_chunk_idx, new_tuple_idx, n);
335
+ return block_iterator_t(*state, new_block_or_chunk_idx, new_tuple_idx);
318
336
  }
319
337
 
320
338
  reference operator[](const difference_type &n) const {
@@ -62,6 +62,60 @@ public:
62
62
  return nullptr;
63
63
  }
64
64
 
65
+ //! LookupInLeaf returns true if the rowid is in the leaf:
66
+ //! 1) If the leaf is an inlined leaf, check if the rowid matches.
67
+ //! 2) If the leaf is a gate node, perform a search in the nested ART for the rowid.
68
+ static bool LookupInLeaf(ART &art, const Node &node, const ARTKey &rowid) {
69
+ reference<const Node> ref(node);
70
+ idx_t depth = 0;
71
+
72
+ while (ref.get().HasMetadata()) {
73
+ const auto type = ref.get().GetType();
74
+ switch (type) {
75
+ case NType::LEAF_INLINED: {
76
+ return ref.get().GetRowId() == rowid.GetRowId();
77
+ }
78
+ case NType::LEAF: {
79
+ throw InternalException("Invalid node type (LEAF) for ARTOperator::NestedLookup.");
80
+ }
81
+ case NType::NODE_7_LEAF:
82
+ case NType::NODE_15_LEAF:
83
+ case NType::NODE_256_LEAF: {
84
+ D_ASSERT(depth + 1 == Prefix::ROW_ID_SIZE);
85
+ const auto byte = rowid[Prefix::ROW_ID_COUNT];
86
+ return ref.get().HasByte(art, byte);
87
+ }
88
+ case NType::NODE_4:
89
+ case NType::NODE_16:
90
+ case NType::NODE_48:
91
+ case NType::NODE_256: {
92
+ D_ASSERT(depth < Prefix::ROW_ID_SIZE);
93
+ auto child = ref.get().GetChild(art, rowid[depth]);
94
+ if (child) {
95
+ // Continue in the child.
96
+ ref = *child;
97
+ depth++;
98
+ D_ASSERT(ref.get().HasMetadata());
99
+ continue;
100
+ }
101
+ return false;
102
+ }
103
+ case NType::PREFIX: {
104
+ Prefix prefix(art, ref.get());
105
+ for (idx_t i = 0; i < prefix.data[Prefix::Count(art)]; i++) {
106
+ if (prefix.data[i] != rowid[depth]) {
107
+ // The key and the prefix don't match.
108
+ return false;
109
+ }
110
+ depth++;
111
+ }
112
+ ref = *prefix.ptr;
113
+ }
114
+ }
115
+ }
116
+ return false;
117
+ }
118
+
65
119
  //! Insert a key and its row ID into the node.
66
120
  //! Starts at depth (in the key).
67
121
  //! status indicates if the insert happens inside a gate or not.
@@ -8,6 +8,7 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include "duckdb/execution/index/unbound_index.hpp"
11
12
  #include "duckdb/common/enums/index_constraint_type.hpp"
12
13
  #include "duckdb/common/types/constraint_conflict_info.hpp"
13
14
  #include "duckdb/common/types/data_chunk.hpp"
@@ -60,6 +61,16 @@ public:
60
61
  //! The index constraint type
61
62
  IndexConstraintType index_constraint_type;
62
63
 
64
+ //! The vector of unbound expressions, which are later turned into bound expressions.
65
+ //! We need to store the unbound expressions, as we might not always have the context
66
+ //! available to bind directly.
67
+ //! The leaves of these unbound expressions are BoundColumnRefExpressions.
68
+ //! These BoundColumnRefExpressions contain a binding (ColumnBinding),
69
+ //! and that contains a table_index and a column_index.
70
+ //! The table_index is a dummy placeholder.
71
+ //! The column_index indexes the column_ids vector in the Index base class.
72
+ //! Those column_ids store the physical table indexes of the Index,
73
+ //! and we use them when binding the unbound expressions.
63
74
  vector<unique_ptr<Expression>> unbound_expressions;
64
75
 
65
76
  public:
@@ -155,14 +166,22 @@ public:
155
166
  virtual string GetConstraintViolationMessage(VerifyExistenceType verify_type, idx_t failed_index,
156
167
  DataChunk &input) = 0;
157
168
 
158
- void ApplyBufferedAppends(const vector<LogicalType> &table_types, ColumnDataCollection &buffered_appends,
169
+ //! Replay index insert and delete operations buffered during WAL replay.
170
+ //! table_types has the physical types of the table in the order they appear, not logical (no generated columns).
171
+ //! mapped_column_ids contains the sorted order of Indexed physical column ID's (see unbound_index.hpp comments).
172
+ void ApplyBufferedReplays(const vector<LogicalType> &table_types, vector<BufferedIndexData> &buffered_replays,
159
173
  const vector<StorageIndex> &mapped_column_ids);
160
174
 
161
175
  protected:
162
176
  //! Lock used for any changes to the index
163
177
  mutex lock;
164
178
 
165
- //! Bound expressions used during expression execution
179
+ //! The vector of bound expressions to generate the Index keys based on a data chunk.
180
+ //! The leaves of the bound expressions are BoundReferenceExpressions.
181
+ //! These BoundReferenceExpressions contain offsets into the DataChunk to retrieve the columns
182
+ //! for the expression.
183
+ //! With these offsets into the DataChunk, the expression executor can now evaluate the expression
184
+ //! on incoming data chunks to generate the keys.
166
185
  vector<unique_ptr<Expression>> bound_expressions;
167
186
 
168
187
  private: