duckdb 0.7.2-dev2366.0 → 0.7.2-dev2430.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  3. package/src/duckdb/src/common/file_buffer.cpp +8 -0
  4. package/src/duckdb/src/common/radix_partitioning.cpp +34 -0
  5. package/src/duckdb/src/common/sort/partition_state.cpp +44 -124
  6. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  7. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +144 -31
  8. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +698 -0
  9. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -1
  10. package/src/duckdb/src/function/scalar/list/list_sort.cpp +30 -56
  11. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  12. package/src/duckdb/src/include/duckdb/common/enums/debug_initialize.hpp +17 -0
  13. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +8 -0
  14. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  15. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +3 -0
  16. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +3 -0
  17. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -60
  18. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +6 -1
  19. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +93 -0
  20. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +1 -1
  21. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  22. package/src/duckdb/src/include/duckdb/main/config.hpp +7 -2
  23. package/src/duckdb/src/include/duckdb/main/settings.hpp +13 -3
  24. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -2
  25. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +1 -0
  26. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +3 -0
  27. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -1
  28. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +11 -5
  29. package/src/duckdb/src/main/config.cpp +26 -0
  30. package/src/duckdb/src/main/settings/settings.cpp +31 -8
  31. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +2 -5
  32. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +6 -14
  33. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +2 -5
  34. package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -2
  35. package/src/duckdb/src/storage/meta_block_writer.cpp +4 -0
  36. package/src/duckdb/src/storage/partial_block_manager.cpp +11 -4
  37. package/src/duckdb/src/storage/single_file_block_manager.cpp +16 -9
  38. package/src/duckdb/src/storage/standard_buffer_manager.cpp +5 -2
  39. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  40. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +21 -1
  41. package/src/duckdb/ub_src_execution_operator_join.cpp +2 -0
@@ -1,4 +1,5 @@
1
1
  #include "duckdb/execution/operator/aggregate/physical_window.hpp"
2
+ #include "duckdb/execution/operator/join/physical_asof_join.hpp"
2
3
  #include "duckdb/execution/operator/join/physical_iejoin.hpp"
3
4
  #include "duckdb/execution/operator/projection/physical_projection.hpp"
4
5
  #include "duckdb/execution/physical_plan_generator.hpp"
@@ -39,7 +40,11 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalAsOfJoin &
39
40
  }
40
41
  D_ASSERT(asof_idx < op.conditions.size());
41
42
 
42
- // Temporary implementation: IEJoin of Window
43
+ if (!ClientConfig::GetConfig(context).force_asof_iejoin) {
44
+ return make_uniq<PhysicalAsOfJoin>(op, std::move(left), std::move(right));
45
+ }
46
+
47
+ // Debug implementation: IEJoin of Window
43
48
  // LEAD(asof_column, 1, infinity) OVER (PARTITION BY equi_column... ORDER BY asof_column) AS asof_temp
44
49
  auto &asof_comp = op.conditions[asof_idx];
45
50
  auto &asof_column = asof_comp.right;
@@ -47,6 +52,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalAsOfJoin &
47
52
  auto asof_temp = make_uniq<BoundWindowExpression>(ExpressionType::WINDOW_LEAD, asof_type, nullptr, nullptr);
48
53
  asof_temp->children.emplace_back(asof_column->Copy());
49
54
  asof_temp->offset_expr = make_uniq<BoundConstantExpression>(Value::BIGINT(1));
55
+ // TODO: If infinities are not supported for a type, fake them by looking at LHS statistics?
50
56
  asof_temp->default_expr = make_uniq<BoundConstantExpression>(Value::Infinity(asof_type));
51
57
  for (auto equi_idx : equi_indexes) {
52
58
  asof_temp->partitions.emplace_back(op.conditions[equi_idx].right->Copy());
@@ -244,83 +244,57 @@ static unique_ptr<FunctionData> ListSortBind(ClientContext &context, ScalarFunct
244
244
  return make_uniq<ListSortBindData>(order, null_order, bound_function.return_type, child_type, context);
245
245
  }
246
246
 
247
- static OrderByNullType GetNullOrder(ClientContext &context, vector<unique_ptr<Expression>> &arguments, idx_t idx) {
248
-
249
- if (!arguments[idx]->IsFoldable()) {
250
- throw InvalidInputException("Null sorting order must be a constant");
251
- }
252
- Value null_order_value = ExpressionExecutor::EvaluateScalar(context, *arguments[idx]);
253
- auto null_order_name = StringUtil::Upper(null_order_value.ToString());
254
- const auto null_order_arg = EnumSerializer::StringToEnum<OrderByNullType>(null_order_name.c_str());
255
- switch (null_order_arg) {
256
- case OrderByNullType::NULLS_FIRST:
257
- case OrderByNullType::NULLS_LAST:
258
- return null_order_arg;
259
- case OrderByNullType::ORDER_DEFAULT:
260
- return DBConfig::GetConfig(context).options.default_null_order;
261
- default:
262
- throw InvalidInputException("Null sorting order must be either NULLS FIRST, NULLS LAST or DEFAULT");
247
+ template <class T>
248
+ static T GetOrder(ClientContext &context, Expression &expr) {
249
+ if (!expr.IsFoldable()) {
250
+ throw InvalidInputException("Sorting order must be a constant");
263
251
  }
252
+ Value order_value = ExpressionExecutor::EvaluateScalar(context, expr);
253
+ auto order_name = StringUtil::Upper(order_value.ToString());
254
+ return EnumSerializer::StringToEnum<T>(order_name.c_str());
264
255
  }
265
256
 
266
257
  static unique_ptr<FunctionData> ListNormalSortBind(ClientContext &context, ScalarFunction &bound_function,
267
258
  vector<unique_ptr<Expression>> &arguments) {
268
-
269
- D_ASSERT(!bound_function.arguments.empty() && bound_function.arguments.size() <= 3);
270
259
  D_ASSERT(!arguments.empty() && arguments.size() <= 3);
271
-
272
- // set default values
273
- auto &config = DBConfig::GetConfig(context);
274
- auto order = config.options.default_order_type;
275
- auto null_order = config.options.default_null_order;
260
+ auto order = OrderType::ORDER_DEFAULT;
261
+ auto null_order = OrderByNullType::ORDER_DEFAULT;
276
262
 
277
263
  // get the sorting order
278
264
  if (arguments.size() >= 2) {
279
-
280
- if (!arguments[1]->IsFoldable()) {
281
- throw InvalidInputException("Sorting order must be a constant");
282
- }
283
- Value order_value = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
284
-
285
- const auto order_name = StringUtil::Upper(order_value.ToString());
286
- const auto order_arg = EnumSerializer::StringToEnum<OrderType>(order_name.c_str());
287
- switch (order_arg) {
288
- case OrderType::ASCENDING:
289
- case OrderType::DESCENDING:
290
- order = order_arg;
291
- break;
292
- case OrderType::ORDER_DEFAULT:
293
- break;
294
- default:
295
- throw InvalidInputException("Sorting order must be either ASC, DESC or DEFAULT");
296
- }
265
+ order = GetOrder<OrderType>(context, *arguments[1]);
297
266
  }
298
-
299
267
  // get the null sorting order
300
268
  if (arguments.size() == 3) {
301
- null_order = GetNullOrder(context, arguments, 2);
269
+ null_order = GetOrder<OrderByNullType>(context, *arguments[2]);
302
270
  }
303
-
271
+ auto &config = DBConfig::GetConfig(context);
272
+ order = config.ResolveOrder(order);
273
+ null_order = config.ResolveNullOrder(order, null_order);
304
274
  return ListSortBind(context, bound_function, arguments, order, null_order);
305
275
  }
306
276
 
307
277
  static unique_ptr<FunctionData> ListReverseSortBind(ClientContext &context, ScalarFunction &bound_function,
308
278
  vector<unique_ptr<Expression>> &arguments) {
279
+ auto order = OrderType::ORDER_DEFAULT;
280
+ auto null_order = OrderByNullType::ORDER_DEFAULT;
309
281
 
310
- D_ASSERT(bound_function.arguments.size() == 1 || bound_function.arguments.size() == 2);
311
- D_ASSERT(arguments.size() == 1 || arguments.size() == 2);
312
-
313
- // set (reverse) default values
314
- auto &config = DBConfig::GetConfig(context);
315
- auto order =
316
- (config.options.default_order_type == OrderType::ASCENDING) ? OrderType::DESCENDING : OrderType::ASCENDING;
317
- auto null_order = config.options.default_null_order;
318
-
319
- // get the null sorting order
320
282
  if (arguments.size() == 2) {
321
- null_order = GetNullOrder(context, arguments, 1);
283
+ null_order = GetOrder<OrderByNullType>(context, *arguments[1]);
322
284
  }
323
-
285
+ auto &config = DBConfig::GetConfig(context);
286
+ order = config.ResolveOrder(order);
287
+ switch (order) {
288
+ case OrderType::ASCENDING:
289
+ order = OrderType::DESCENDING;
290
+ break;
291
+ case OrderType::DESCENDING:
292
+ order = OrderType::ASCENDING;
293
+ break;
294
+ default:
295
+ throw InternalException("Unexpected order type in list reverse sort");
296
+ }
297
+ null_order = config.ResolveNullOrder(order, null_order);
324
298
  return ListSortBind(context, bound_function, arguments, order, null_order);
325
299
  }
326
300
 
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev2366"
2
+ #define DUCKDB_VERSION "0.7.2-dev2430"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "1d7b9c315e"
5
+ #define DUCKDB_SOURCE_ID "f429595834"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -0,0 +1,17 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/enums/debug_initialize.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/constants.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ enum class DebugInitialize : uint8_t { NO_INITIALIZE = 0, DEBUG_ZERO_INITIALIZE = 1, DEBUG_ONE_INITIALIZE = 2 };
16
+
17
+ } // namespace duckdb
@@ -17,4 +17,12 @@ enum class OrderType : uint8_t { INVALID = 0, ORDER_DEFAULT = 1, ASCENDING = 2,
17
17
 
18
18
  enum class OrderByNullType : uint8_t { INVALID = 0, ORDER_DEFAULT = 1, NULLS_FIRST = 2, NULLS_LAST = 3 };
19
19
 
20
+ enum class DefaultOrderByNullType : uint8_t {
21
+ INVALID = 0,
22
+ NULLS_FIRST = 2,
23
+ NULLS_LAST = 3,
24
+ NULLS_FIRST_ON_ASC_LAST_ON_DESC = 4,
25
+ NULLS_LAST_ON_ASC_FIRST_ON_DESC = 5
26
+ };
27
+
20
28
  } // namespace duckdb
@@ -58,6 +58,7 @@ enum class PhysicalOperatorType : uint8_t {
58
58
  DELIM_JOIN,
59
59
  INDEX_JOIN,
60
60
  POSITIONAL_JOIN,
61
+ ASOF_JOIN,
61
62
  // -----------------------------
62
63
  // SetOps
63
64
  // -----------------------------
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/constants.hpp"
12
+ #include "duckdb/common/enums/debug_initialize.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
  class Allocator;
@@ -62,6 +63,8 @@ public:
62
63
 
63
64
  MemoryRequirement CalculateMemory(uint64_t user_size);
64
65
 
66
+ void Initialize(DebugInitialize info);
67
+
65
68
  protected:
66
69
  //! The pointer to the internal buffer that will be read or written, including the buffer header
67
70
  data_ptr_t internal_buffer;
@@ -49,6 +49,9 @@ public:
49
49
  //! Select using a cutoff on the radix bits of the hash
50
50
  static idx_t Select(Vector &hashes, const SelectionVector *sel, idx_t count, idx_t radix_bits, idx_t cutoff,
51
51
  SelectionVector *true_sel, SelectionVector *false_sel);
52
+
53
+ //! Convert hashes to bins
54
+ static void HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count);
52
55
  };
53
56
 
54
57
  //! Templated radix partitioning constants, can be templated to the number of radix bits
@@ -25,6 +25,8 @@ public:
25
25
  PartitionGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
26
26
  const Types &payload_types, bool external);
27
27
 
28
+ int ComparePartitions(const SBIterator &left, const SBIterator &right) const;
29
+
28
30
  void ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask);
29
31
 
30
32
  GlobalSortStatePtr global_sort;
@@ -43,8 +45,12 @@ public:
43
45
  using GroupingPartition = unique_ptr<PartitionedColumnData>;
44
46
  using GroupingAppend = unique_ptr<PartitionedColumnDataAppendState>;
45
47
 
46
- PartitionGlobalSinkState(ClientContext &context, const vector<unique_ptr<Expression>> &partitions_p,
47
- const vector<BoundOrderByNode> &orders_p, const Types &payload_types,
48
+ static void GenerateOrderings(Orders &partitions, Orders &orders,
49
+ const vector<unique_ptr<Expression>> &partition_bys, const Orders &order_bys,
50
+ const vector<unique_ptr<BaseStatistics>> &partitions_stats);
51
+
52
+ PartitionGlobalSinkState(ClientContext &context, const vector<unique_ptr<Expression>> &partition_bys,
53
+ const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
48
54
  const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
49
55
 
50
56
  void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
@@ -68,6 +74,8 @@ public:
68
74
  const Types payload_types;
69
75
  vector<HashGroupPtr> hash_groups;
70
76
  bool external;
77
+ // Reverse lookup from hash bins to non-empty hash groups
78
+ vector<size_t> bin_groups;
71
79
 
72
80
  // OVER() (no sorting)
73
81
  unique_ptr<RowDataCollection> rows;
@@ -121,7 +129,7 @@ class PartitionGlobalMergeState {
121
129
  public:
122
130
  using GroupDataPtr = unique_ptr<ColumnDataCollection>;
123
131
 
124
- explicit PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data);
132
+ PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);
125
133
 
126
134
  bool IsSorted() const {
127
135
  lock_guard<mutex> guard(lock);
@@ -187,61 +195,4 @@ public:
187
195
  void Schedule() override;
188
196
  };
189
197
 
190
- class PartitionGlobalSourceState {
191
- public:
192
- explicit PartitionGlobalSourceState(PartitionGlobalSinkState &gsink_p) : gsink(gsink_p), next_bin(0) {
193
- }
194
-
195
- PartitionGlobalSinkState &gsink;
196
- //! The output read position.
197
- atomic<idx_t> next_bin;
198
-
199
- public:
200
- idx_t MaxThreads() {
201
- // If there is only one partition, we have to process it on one thread.
202
- if (!gsink.grouping_data) {
203
- return 1;
204
- }
205
-
206
- // If there is not a lot of data, process serially.
207
- if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
208
- return 1;
209
- }
210
-
211
- return gsink.hash_groups.size();
212
- }
213
- };
214
-
215
- // Per-thread read state
216
- class PartitionLocalSourceState {
217
- public:
218
- using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
219
-
220
- explicit PartitionLocalSourceState(PartitionGlobalSinkState &gstate_p);
221
-
222
- void MaterializeSortedData();
223
- idx_t GeneratePartition(const idx_t hash_bin);
224
-
225
- PartitionGlobalSinkState &gstate;
226
-
227
- //! The read partition
228
- idx_t hash_bin;
229
- HashGroupPtr hash_group;
230
-
231
- //! The generated input chunks
232
- unique_ptr<RowDataCollection> rows;
233
- unique_ptr<RowDataCollection> heap;
234
- RowLayout layout;
235
- //! The partition boundary mask
236
- vector<validity_t> partition_bits;
237
- ValidityMask partition_mask;
238
- //! The order boundary mask
239
- vector<validity_t> order_bits;
240
- ValidityMask order_mask;
241
- //! The read cursor
242
- unique_ptr<RowDataCollectionScanner> scanner;
243
- //! Buffer for the inputs
244
- DataChunk input_chunk;
245
- };
246
-
247
198
  } // namespace duckdb
@@ -29,7 +29,7 @@ struct OuterJoinLocalScanState {
29
29
 
30
30
  class OuterJoinMarker {
31
31
  public:
32
- OuterJoinMarker(bool enabled);
32
+ explicit OuterJoinMarker(bool enabled);
33
33
 
34
34
  bool Enabled() {
35
35
  return enabled;
@@ -60,6 +60,11 @@ public:
60
60
  //! Perform the scan
61
61
  void Scan(OuterJoinGlobalScanState &gstate, OuterJoinLocalScanState &lstate, DataChunk &result);
62
62
 
63
+ //! Read-only matches vector
64
+ const bool *GetMatches() const {
65
+ return found_match.get();
66
+ }
67
+
63
68
  private:
64
69
  bool enabled;
65
70
  unique_ptr<bool[]> found_match;
@@ -0,0 +1,93 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/operator/join/physical_asof_join.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/execution/operator/join/physical_comparison_join.hpp"
12
+ #include "duckdb/planner/bound_result_modifier.hpp"
13
+
14
+ namespace duckdb {
15
+
16
+ //! PhysicalAsOfJoin represents a piecewise merge loop join between
17
+ //! two tables
18
+ class PhysicalAsOfJoin : public PhysicalComparisonJoin {
19
+ public:
20
+ static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::ASOF_JOIN;
21
+
22
+ public:
23
+ PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left, unique_ptr<PhysicalOperator> right);
24
+
25
+ vector<LogicalType> join_key_types;
26
+ vector<column_t> null_sensitive;
27
+
28
+ // Equalities
29
+ vector<unique_ptr<Expression>> lhs_partitions;
30
+ vector<unique_ptr<Expression>> rhs_partitions;
31
+
32
+ // Inequality Only
33
+ vector<BoundOrderByNode> lhs_orders;
34
+ vector<BoundOrderByNode> rhs_orders;
35
+
36
+ // Projection mappings
37
+ vector<column_t> right_projection_map;
38
+
39
+ public:
40
+ // Operator Interface
41
+ unique_ptr<GlobalOperatorState> GetGlobalOperatorState(ClientContext &context) const override;
42
+ unique_ptr<OperatorState> GetOperatorState(ExecutionContext &context) const override;
43
+
44
+ bool ParallelOperator() const override {
45
+ return true;
46
+ }
47
+
48
+ protected:
49
+ // CachingOperator Interface
50
+ OperatorResultType ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
51
+ GlobalOperatorState &gstate, OperatorState &state) const override;
52
+
53
+ public:
54
+ // Source interface
55
+ unique_ptr<LocalSourceState> GetLocalSourceState(ExecutionContext &context,
56
+ GlobalSourceState &gstate) const override;
57
+ unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
58
+ void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
59
+ LocalSourceState &lstate) const override;
60
+
61
+ bool IsSource() const override {
62
+ return IsRightOuterJoin(join_type);
63
+ }
64
+ bool ParallelSource() const override {
65
+ return true;
66
+ }
67
+
68
+ public:
69
+ // Sink Interface
70
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
71
+ unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
72
+ SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
73
+ DataChunk &input) const override;
74
+ void Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const override;
75
+ SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
76
+ GlobalSinkState &gstate) const override;
77
+
78
+ bool IsSink() const override {
79
+ return true;
80
+ }
81
+ bool ParallelSink() const override {
82
+ return true;
83
+ }
84
+
85
+ private:
86
+ // resolve joins that output max N elements (SEMI, ANTI, MARK)
87
+ void ResolveSimpleJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk, OperatorState &state) const;
88
+ // resolve joins that can potentially output N*M elements (INNER, LEFT, FULL)
89
+ OperatorResultType ResolveComplexJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
90
+ OperatorState &state) const;
91
+ };
92
+
93
+ } // namespace duckdb
@@ -199,7 +199,7 @@ public:
199
199
  ~CachingOperatorState() override {
200
200
  }
201
201
 
202
- virtual void Finalize(const PhysicalOperator &op, ExecutionContext &context) override {
202
+ void Finalize(const PhysicalOperator &op, ExecutionContext &context) override {
203
203
  }
204
204
 
205
205
  unique_ptr<DataChunk> cached_chunk;
@@ -71,6 +71,8 @@ struct ClientConfig {
71
71
  bool force_external = false;
72
72
  //! Force disable cross product generation when hyper graph isn't connected, used for testing
73
73
  bool force_no_cross_product = false;
74
+ //! Force use of IEJoin to implement AsOfJoin, used for testing
75
+ bool force_asof_iejoin = false;
74
76
  //! If this context should also try to use the available replacement scans
75
77
  //! True by default
76
78
  bool use_replacement_scans = true;
@@ -106,8 +106,8 @@ struct DBConfigOptions {
106
106
  string collation = string();
107
107
  //! The order type used when none is specified (default: ASC)
108
108
  OrderType default_order_type = OrderType::ASCENDING;
109
- //! Null ordering used when none is specified (default: NULLS FIRST)
110
- OrderByNullType default_null_order = OrderByNullType::NULLS_FIRST;
109
+ //! Null ordering used when none is specified (default: NULLS LAST)
110
+ DefaultOrderByNullType default_null_order = DefaultOrderByNullType::NULLS_LAST;
111
111
  //! enable COPY and related commands
112
112
  bool enable_external_access = true;
113
113
  //! Whether or not object cache is used
@@ -145,6 +145,8 @@ struct DBConfigOptions {
145
145
  bool enable_fsst_vectors = false;
146
146
  //! Start transactions immediately in all attached databases - instead of lazily when a database is referenced
147
147
  bool immediate_transaction_mode = false;
148
+ //! Debug setting - how to initialize blocks in the storage layer when allocating
149
+ DebugInitialize debug_initialize = DebugInitialize::NO_INITIALIZE;
148
150
  //! The set of unrecognized (other) options
149
151
  unordered_map<string, Value> unrecognized_options;
150
152
 
@@ -226,6 +228,9 @@ public:
226
228
  void SetDefaultMaxThreads();
227
229
  void SetDefaultMaxMemory();
228
230
 
231
+ OrderType ResolveOrder(OrderType order_type) const;
232
+ OrderByNullType ResolveNullOrder(OrderType order_type, OrderByNullType null_type) const;
233
+
229
234
  private:
230
235
  unique_ptr<CompressionFunctionSet> compression_functions;
231
236
  unique_ptr<CastFunctionSet> cast_functions;
@@ -66,9 +66,19 @@ struct DebugForceNoCrossProduct {
66
66
  };
67
67
 
68
68
  struct OrderedAggregateThreshold {
69
- static constexpr const char *Name = "ordered_aggregate_threshold";
70
- static constexpr const char *Description = "the number of rows to accumulate before sorting, used for tuning";
71
- static constexpr const LogicalTypeId InputType = LogicalTypeId::UBIGINT;
69
+ static constexpr const char *Name = "ordered_aggregate_threshold"; // NOLINT
70
+ static constexpr const char *Description = // NOLINT
71
+ "the number of rows to accumulate before sorting, used for tuning";
72
+ static constexpr const LogicalTypeId InputType = LogicalTypeId::UBIGINT; // NOLINT
73
+ static void SetLocal(ClientContext &context, const Value &parameter);
74
+ static void ResetLocal(ClientContext &context);
75
+ static Value GetSetting(ClientContext &context);
76
+ };
77
+
78
+ struct DebugAsOfIEJoin {
79
+ static constexpr const char *Name = "debug_asof_iejoin"; // NOLINT
80
+ static constexpr const char *Description = "DEBUG SETTING: force use of IEJoin to implement AsOf joins"; // NOLINT
81
+ static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN; // NOLINT
72
82
  static void SetLocal(ClientContext &context, const Value &parameter);
73
83
  static void ResetLocal(ClientContext &context);
74
84
  static Value GetSetting(ClientContext &context);
@@ -87,8 +87,10 @@ public:
87
87
  // Start with function call
88
88
  string result = schema.empty() ? function_name : schema + "." + function_name;
89
89
  result += "(";
90
- result += StringUtil::Join(entry.children, entry.children.size(), ", ",
91
- [](const unique_ptr<BASE> &child) { return child->ToString(); });
90
+ if (entry.children.size()) {
91
+ result += StringUtil::Join(entry.children, entry.children.size(), ", ",
92
+ [](const unique_ptr<BASE> &child) { return child->ToString(); });
93
+ }
92
94
  // Lead/Lag extra arguments
93
95
  if (entry.offset_expr.get()) {
94
96
  result += ", ";
@@ -33,6 +33,7 @@ public:
33
33
 
34
34
  public:
35
35
  //! Creates a new block inside the block manager
36
+ virtual unique_ptr<Block> ConvertBlock(block_id_t block_id, FileBuffer &source_buffer) = 0;
36
37
  virtual unique_ptr<Block> CreateBlock(block_id_t block_id, FileBuffer *source_buffer) = 0;
37
38
  //! Return the next free block id
38
39
  virtual block_id_t GetFreeBlockId() = 0;
@@ -20,6 +20,9 @@ public:
20
20
  using BlockManager::BlockManager;
21
21
 
22
22
  // LCOV_EXCL_START
23
+ unique_ptr<Block> ConvertBlock(block_id_t block_id, FileBuffer &source_buffer) override {
24
+ throw InternalException("Cannot perform IO in in-memory database!");
25
+ }
23
26
  unique_ptr<Block> CreateBlock(block_id_t block_id, FileBuffer *source_buffer) override {
24
27
  throw InternalException("Cannot perform IO in in-memory database!");
25
28
  }
@@ -44,7 +44,8 @@ struct PartialBlock {
44
44
  PartialBlockState state;
45
45
 
46
46
  public:
47
- virtual void Flush() = 0;
47
+ virtual void AddUninitializedRegion(idx_t start, idx_t end) = 0;
48
+ virtual void Flush(idx_t free_space_left) = 0;
48
49
  virtual void Clear() {
49
50
  }
50
51
  };
@@ -15,24 +15,32 @@
15
15
  #include "duckdb/common/unordered_set.hpp"
16
16
  #include "duckdb/common/set.hpp"
17
17
  #include "duckdb/common/vector.hpp"
18
+ #include "duckdb/main/config.hpp"
18
19
 
19
20
  namespace duckdb {
20
21
 
21
22
  class DatabaseInstance;
22
23
 
24
+ struct StorageManagerOptions {
25
+ bool read_only = false;
26
+ bool use_direct_io = false;
27
+ DebugInitialize debug_initialize = DebugInitialize::NO_INITIALIZE;
28
+ };
29
+
23
30
  //! SingleFileBlockManager is an implementation for a BlockManager which manages blocks in a single file
24
31
  class SingleFileBlockManager : public BlockManager {
25
32
  //! The location in the file where the block writing starts
26
33
  static constexpr uint64_t BLOCK_START = Storage::FILE_HEADER_SIZE * 3;
27
34
 
28
35
  public:
29
- SingleFileBlockManager(AttachedDatabase &db, string path, bool read_only, bool use_direct_io);
36
+ SingleFileBlockManager(AttachedDatabase &db, string path, StorageManagerOptions options);
30
37
 
31
38
  void GetFileFlags(uint8_t &flags, FileLockType &lock, bool create_new);
32
39
  void CreateNewDatabase();
33
40
  void LoadExistingDatabase();
34
41
 
35
42
  //! Creates a new Block using the specified block_id and returns a pointer
43
+ unique_ptr<Block> ConvertBlock(block_id_t block_id, FileBuffer &source_buffer) override;
36
44
  unique_ptr<Block> CreateBlock(block_id_t block_id, FileBuffer *source_buffer) override;
37
45
  //! Return the next free block id
38
46
  block_id_t GetFreeBlockId() override;
@@ -96,10 +104,8 @@ private:
96
104
  block_id_t free_list_id;
97
105
  //! The current header iteration count
98
106
  uint64_t iteration_count;
99
- //! Whether or not the db is opened in read-only mode
100
- bool read_only;
101
- //! Whether or not to use Direct IO to read the blocks
102
- bool use_direct_io;
107
+ //! The storage manager options
108
+ StorageManagerOptions options;
103
109
  //! Lock for performing various operations in the single file block manager
104
110
  mutex block_lock;
105
111
  };
@@ -54,6 +54,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
54
54
  DUCKDB_GLOBAL(DebugCheckpointAbort),
55
55
  DUCKDB_LOCAL(DebugForceExternal),
56
56
  DUCKDB_LOCAL(DebugForceNoCrossProduct),
57
+ DUCKDB_LOCAL(DebugAsOfIEJoin),
57
58
  DUCKDB_GLOBAL(DebugWindowMode),
58
59
  DUCKDB_GLOBAL_LOCAL(DefaultCollationSetting),
59
60
  DUCKDB_GLOBAL(DefaultOrderSetting),
@@ -359,4 +360,29 @@ bool DBConfig::operator!=(const DBConfig &other) {
359
360
  return !(other.options == options);
360
361
  }
361
362
 
363
+ OrderType DBConfig::ResolveOrder(OrderType order_type) const {
364
+ if (order_type != OrderType::ORDER_DEFAULT) {
365
+ return order_type;
366
+ }
367
+ return options.default_order_type;
368
+ }
369
+
370
+ OrderByNullType DBConfig::ResolveNullOrder(OrderType order_type, OrderByNullType null_type) const {
371
+ if (null_type != OrderByNullType::ORDER_DEFAULT) {
372
+ return null_type;
373
+ }
374
+ switch (options.default_null_order) {
375
+ case DefaultOrderByNullType::NULLS_FIRST:
376
+ return OrderByNullType::NULLS_FIRST;
377
+ case DefaultOrderByNullType::NULLS_LAST:
378
+ return OrderByNullType::NULLS_LAST;
379
+ case DefaultOrderByNullType::NULLS_FIRST_ON_ASC_LAST_ON_DESC:
380
+ return order_type == OrderType::ASCENDING ? OrderByNullType::NULLS_FIRST : OrderByNullType::NULLS_LAST;
381
+ case DefaultOrderByNullType::NULLS_LAST_ON_ASC_FIRST_ON_DESC:
382
+ return order_type == OrderType::ASCENDING ? OrderByNullType::NULLS_LAST : OrderByNullType::NULLS_FIRST;
383
+ default:
384
+ throw InternalException("Unknown null order setting");
385
+ }
386
+ }
387
+
362
388
  } // namespace duckdb