duckdb 0.7.2-dev2366.0 → 0.7.2-dev2430.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/file_buffer.cpp +8 -0
- package/src/duckdb/src/common/radix_partitioning.cpp +34 -0
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -124
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +144 -31
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +698 -0
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -1
- package/src/duckdb/src/function/scalar/list/list_sort.cpp +30 -56
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/debug_initialize.hpp +17 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -60
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +6 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +93 -0
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +13 -3
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +11 -5
- package/src/duckdb/src/main/config.cpp +26 -0
- package/src/duckdb/src/main/settings/settings.cpp +31 -8
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +2 -5
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +6 -14
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +2 -5
- package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -2
- package/src/duckdb/src/storage/meta_block_writer.cpp +4 -0
- package/src/duckdb/src/storage/partial_block_manager.cpp +11 -4
- package/src/duckdb/src/storage/single_file_block_manager.cpp +16 -9
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +5 -2
- package/src/duckdb/src/storage/storage_manager.cpp +7 -2
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +21 -1
- package/src/duckdb/ub_src_execution_operator_join.cpp +2 -0
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "duckdb/execution/operator/aggregate/physical_window.hpp"
|
2
|
+
#include "duckdb/execution/operator/join/physical_asof_join.hpp"
|
2
3
|
#include "duckdb/execution/operator/join/physical_iejoin.hpp"
|
3
4
|
#include "duckdb/execution/operator/projection/physical_projection.hpp"
|
4
5
|
#include "duckdb/execution/physical_plan_generator.hpp"
|
@@ -39,7 +40,11 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalAsOfJoin &
|
|
39
40
|
}
|
40
41
|
D_ASSERT(asof_idx < op.conditions.size());
|
41
42
|
|
42
|
-
|
43
|
+
if (!ClientConfig::GetConfig(context).force_asof_iejoin) {
|
44
|
+
return make_uniq<PhysicalAsOfJoin>(op, std::move(left), std::move(right));
|
45
|
+
}
|
46
|
+
|
47
|
+
// Debug implementation: IEJoin of Window
|
43
48
|
// LEAD(asof_column, 1, infinity) OVER (PARTITION BY equi_column... ORDER BY asof_column) AS asof_temp
|
44
49
|
auto &asof_comp = op.conditions[asof_idx];
|
45
50
|
auto &asof_column = asof_comp.right;
|
@@ -47,6 +52,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalAsOfJoin &
|
|
47
52
|
auto asof_temp = make_uniq<BoundWindowExpression>(ExpressionType::WINDOW_LEAD, asof_type, nullptr, nullptr);
|
48
53
|
asof_temp->children.emplace_back(asof_column->Copy());
|
49
54
|
asof_temp->offset_expr = make_uniq<BoundConstantExpression>(Value::BIGINT(1));
|
55
|
+
// TODO: If infinities are not supported for a type, fake them by looking at LHS statistics?
|
50
56
|
asof_temp->default_expr = make_uniq<BoundConstantExpression>(Value::Infinity(asof_type));
|
51
57
|
for (auto equi_idx : equi_indexes) {
|
52
58
|
asof_temp->partitions.emplace_back(op.conditions[equi_idx].right->Copy());
|
@@ -244,83 +244,57 @@ static unique_ptr<FunctionData> ListSortBind(ClientContext &context, ScalarFunct
|
|
244
244
|
return make_uniq<ListSortBindData>(order, null_order, bound_function.return_type, child_type, context);
|
245
245
|
}
|
246
246
|
|
247
|
-
|
248
|
-
|
249
|
-
if (!
|
250
|
-
throw InvalidInputException("
|
251
|
-
}
|
252
|
-
Value null_order_value = ExpressionExecutor::EvaluateScalar(context, *arguments[idx]);
|
253
|
-
auto null_order_name = StringUtil::Upper(null_order_value.ToString());
|
254
|
-
const auto null_order_arg = EnumSerializer::StringToEnum<OrderByNullType>(null_order_name.c_str());
|
255
|
-
switch (null_order_arg) {
|
256
|
-
case OrderByNullType::NULLS_FIRST:
|
257
|
-
case OrderByNullType::NULLS_LAST:
|
258
|
-
return null_order_arg;
|
259
|
-
case OrderByNullType::ORDER_DEFAULT:
|
260
|
-
return DBConfig::GetConfig(context).options.default_null_order;
|
261
|
-
default:
|
262
|
-
throw InvalidInputException("Null sorting order must be either NULLS FIRST, NULLS LAST or DEFAULT");
|
247
|
+
template <class T>
|
248
|
+
static T GetOrder(ClientContext &context, Expression &expr) {
|
249
|
+
if (!expr.IsFoldable()) {
|
250
|
+
throw InvalidInputException("Sorting order must be a constant");
|
263
251
|
}
|
252
|
+
Value order_value = ExpressionExecutor::EvaluateScalar(context, expr);
|
253
|
+
auto order_name = StringUtil::Upper(order_value.ToString());
|
254
|
+
return EnumSerializer::StringToEnum<T>(order_name.c_str());
|
264
255
|
}
|
265
256
|
|
266
257
|
static unique_ptr<FunctionData> ListNormalSortBind(ClientContext &context, ScalarFunction &bound_function,
|
267
258
|
vector<unique_ptr<Expression>> &arguments) {
|
268
|
-
|
269
|
-
D_ASSERT(!bound_function.arguments.empty() && bound_function.arguments.size() <= 3);
|
270
259
|
D_ASSERT(!arguments.empty() && arguments.size() <= 3);
|
271
|
-
|
272
|
-
|
273
|
-
auto &config = DBConfig::GetConfig(context);
|
274
|
-
auto order = config.options.default_order_type;
|
275
|
-
auto null_order = config.options.default_null_order;
|
260
|
+
auto order = OrderType::ORDER_DEFAULT;
|
261
|
+
auto null_order = OrderByNullType::ORDER_DEFAULT;
|
276
262
|
|
277
263
|
// get the sorting order
|
278
264
|
if (arguments.size() >= 2) {
|
279
|
-
|
280
|
-
if (!arguments[1]->IsFoldable()) {
|
281
|
-
throw InvalidInputException("Sorting order must be a constant");
|
282
|
-
}
|
283
|
-
Value order_value = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
|
284
|
-
|
285
|
-
const auto order_name = StringUtil::Upper(order_value.ToString());
|
286
|
-
const auto order_arg = EnumSerializer::StringToEnum<OrderType>(order_name.c_str());
|
287
|
-
switch (order_arg) {
|
288
|
-
case OrderType::ASCENDING:
|
289
|
-
case OrderType::DESCENDING:
|
290
|
-
order = order_arg;
|
291
|
-
break;
|
292
|
-
case OrderType::ORDER_DEFAULT:
|
293
|
-
break;
|
294
|
-
default:
|
295
|
-
throw InvalidInputException("Sorting order must be either ASC, DESC or DEFAULT");
|
296
|
-
}
|
265
|
+
order = GetOrder<OrderType>(context, *arguments[1]);
|
297
266
|
}
|
298
|
-
|
299
267
|
// get the null sorting order
|
300
268
|
if (arguments.size() == 3) {
|
301
|
-
null_order =
|
269
|
+
null_order = GetOrder<OrderByNullType>(context, *arguments[2]);
|
302
270
|
}
|
303
|
-
|
271
|
+
auto &config = DBConfig::GetConfig(context);
|
272
|
+
order = config.ResolveOrder(order);
|
273
|
+
null_order = config.ResolveNullOrder(order, null_order);
|
304
274
|
return ListSortBind(context, bound_function, arguments, order, null_order);
|
305
275
|
}
|
306
276
|
|
307
277
|
static unique_ptr<FunctionData> ListReverseSortBind(ClientContext &context, ScalarFunction &bound_function,
|
308
278
|
vector<unique_ptr<Expression>> &arguments) {
|
279
|
+
auto order = OrderType::ORDER_DEFAULT;
|
280
|
+
auto null_order = OrderByNullType::ORDER_DEFAULT;
|
309
281
|
|
310
|
-
D_ASSERT(bound_function.arguments.size() == 1 || bound_function.arguments.size() == 2);
|
311
|
-
D_ASSERT(arguments.size() == 1 || arguments.size() == 2);
|
312
|
-
|
313
|
-
// set (reverse) default values
|
314
|
-
auto &config = DBConfig::GetConfig(context);
|
315
|
-
auto order =
|
316
|
-
(config.options.default_order_type == OrderType::ASCENDING) ? OrderType::DESCENDING : OrderType::ASCENDING;
|
317
|
-
auto null_order = config.options.default_null_order;
|
318
|
-
|
319
|
-
// get the null sorting order
|
320
282
|
if (arguments.size() == 2) {
|
321
|
-
null_order =
|
283
|
+
null_order = GetOrder<OrderByNullType>(context, *arguments[1]);
|
322
284
|
}
|
323
|
-
|
285
|
+
auto &config = DBConfig::GetConfig(context);
|
286
|
+
order = config.ResolveOrder(order);
|
287
|
+
switch (order) {
|
288
|
+
case OrderType::ASCENDING:
|
289
|
+
order = OrderType::DESCENDING;
|
290
|
+
break;
|
291
|
+
case OrderType::DESCENDING:
|
292
|
+
order = OrderType::ASCENDING;
|
293
|
+
break;
|
294
|
+
default:
|
295
|
+
throw InternalException("Unexpected order type in list reverse sort");
|
296
|
+
}
|
297
|
+
null_order = config.ResolveNullOrder(order, null_order);
|
324
298
|
return ListSortBind(context, bound_function, arguments, order, null_order);
|
325
299
|
}
|
326
300
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.7.2-
|
2
|
+
#define DUCKDB_VERSION "0.7.2-dev2430"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "f429595834"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -0,0 +1,17 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/common/enums/debug_initialize.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/constants.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
enum class DebugInitialize : uint8_t { NO_INITIALIZE = 0, DEBUG_ZERO_INITIALIZE = 1, DEBUG_ONE_INITIALIZE = 2 };
|
16
|
+
|
17
|
+
} // namespace duckdb
|
@@ -17,4 +17,12 @@ enum class OrderType : uint8_t { INVALID = 0, ORDER_DEFAULT = 1, ASCENDING = 2,
|
|
17
17
|
|
18
18
|
enum class OrderByNullType : uint8_t { INVALID = 0, ORDER_DEFAULT = 1, NULLS_FIRST = 2, NULLS_LAST = 3 };
|
19
19
|
|
20
|
+
enum class DefaultOrderByNullType : uint8_t {
|
21
|
+
INVALID = 0,
|
22
|
+
NULLS_FIRST = 2,
|
23
|
+
NULLS_LAST = 3,
|
24
|
+
NULLS_FIRST_ON_ASC_LAST_ON_DESC = 4,
|
25
|
+
NULLS_LAST_ON_ASC_FIRST_ON_DESC = 5
|
26
|
+
};
|
27
|
+
|
20
28
|
} // namespace duckdb
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/constants.hpp"
|
12
|
+
#include "duckdb/common/enums/debug_initialize.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
class Allocator;
|
@@ -62,6 +63,8 @@ public:
|
|
62
63
|
|
63
64
|
MemoryRequirement CalculateMemory(uint64_t user_size);
|
64
65
|
|
66
|
+
void Initialize(DebugInitialize info);
|
67
|
+
|
65
68
|
protected:
|
66
69
|
//! The pointer to the internal buffer that will be read or written, including the buffer header
|
67
70
|
data_ptr_t internal_buffer;
|
@@ -49,6 +49,9 @@ public:
|
|
49
49
|
//! Select using a cutoff on the radix bits of the hash
|
50
50
|
static idx_t Select(Vector &hashes, const SelectionVector *sel, idx_t count, idx_t radix_bits, idx_t cutoff,
|
51
51
|
SelectionVector *true_sel, SelectionVector *false_sel);
|
52
|
+
|
53
|
+
//! Convert hashes to bins
|
54
|
+
static void HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count);
|
52
55
|
};
|
53
56
|
|
54
57
|
//! Templated radix partitioning constants, can be templated to the number of radix bits
|
@@ -25,6 +25,8 @@ public:
|
|
25
25
|
PartitionGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
|
26
26
|
const Types &payload_types, bool external);
|
27
27
|
|
28
|
+
int ComparePartitions(const SBIterator &left, const SBIterator &right) const;
|
29
|
+
|
28
30
|
void ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask);
|
29
31
|
|
30
32
|
GlobalSortStatePtr global_sort;
|
@@ -43,8 +45,12 @@ public:
|
|
43
45
|
using GroupingPartition = unique_ptr<PartitionedColumnData>;
|
44
46
|
using GroupingAppend = unique_ptr<PartitionedColumnDataAppendState>;
|
45
47
|
|
46
|
-
|
47
|
-
|
48
|
+
static void GenerateOrderings(Orders &partitions, Orders &orders,
|
49
|
+
const vector<unique_ptr<Expression>> &partition_bys, const Orders &order_bys,
|
50
|
+
const vector<unique_ptr<BaseStatistics>> &partitions_stats);
|
51
|
+
|
52
|
+
PartitionGlobalSinkState(ClientContext &context, const vector<unique_ptr<Expression>> &partition_bys,
|
53
|
+
const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
|
48
54
|
const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
|
49
55
|
|
50
56
|
void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
@@ -68,6 +74,8 @@ public:
|
|
68
74
|
const Types payload_types;
|
69
75
|
vector<HashGroupPtr> hash_groups;
|
70
76
|
bool external;
|
77
|
+
// Reverse lookup from hash bins to non-empty hash groups
|
78
|
+
vector<size_t> bin_groups;
|
71
79
|
|
72
80
|
// OVER() (no sorting)
|
73
81
|
unique_ptr<RowDataCollection> rows;
|
@@ -121,7 +129,7 @@ class PartitionGlobalMergeState {
|
|
121
129
|
public:
|
122
130
|
using GroupDataPtr = unique_ptr<ColumnDataCollection>;
|
123
131
|
|
124
|
-
|
132
|
+
PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);
|
125
133
|
|
126
134
|
bool IsSorted() const {
|
127
135
|
lock_guard<mutex> guard(lock);
|
@@ -187,61 +195,4 @@ public:
|
|
187
195
|
void Schedule() override;
|
188
196
|
};
|
189
197
|
|
190
|
-
class PartitionGlobalSourceState {
|
191
|
-
public:
|
192
|
-
explicit PartitionGlobalSourceState(PartitionGlobalSinkState &gsink_p) : gsink(gsink_p), next_bin(0) {
|
193
|
-
}
|
194
|
-
|
195
|
-
PartitionGlobalSinkState &gsink;
|
196
|
-
//! The output read position.
|
197
|
-
atomic<idx_t> next_bin;
|
198
|
-
|
199
|
-
public:
|
200
|
-
idx_t MaxThreads() {
|
201
|
-
// If there is only one partition, we have to process it on one thread.
|
202
|
-
if (!gsink.grouping_data) {
|
203
|
-
return 1;
|
204
|
-
}
|
205
|
-
|
206
|
-
// If there is not a lot of data, process serially.
|
207
|
-
if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
|
208
|
-
return 1;
|
209
|
-
}
|
210
|
-
|
211
|
-
return gsink.hash_groups.size();
|
212
|
-
}
|
213
|
-
};
|
214
|
-
|
215
|
-
// Per-thread read state
|
216
|
-
class PartitionLocalSourceState {
|
217
|
-
public:
|
218
|
-
using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
|
219
|
-
|
220
|
-
explicit PartitionLocalSourceState(PartitionGlobalSinkState &gstate_p);
|
221
|
-
|
222
|
-
void MaterializeSortedData();
|
223
|
-
idx_t GeneratePartition(const idx_t hash_bin);
|
224
|
-
|
225
|
-
PartitionGlobalSinkState &gstate;
|
226
|
-
|
227
|
-
//! The read partition
|
228
|
-
idx_t hash_bin;
|
229
|
-
HashGroupPtr hash_group;
|
230
|
-
|
231
|
-
//! The generated input chunks
|
232
|
-
unique_ptr<RowDataCollection> rows;
|
233
|
-
unique_ptr<RowDataCollection> heap;
|
234
|
-
RowLayout layout;
|
235
|
-
//! The partition boundary mask
|
236
|
-
vector<validity_t> partition_bits;
|
237
|
-
ValidityMask partition_mask;
|
238
|
-
//! The order boundary mask
|
239
|
-
vector<validity_t> order_bits;
|
240
|
-
ValidityMask order_mask;
|
241
|
-
//! The read cursor
|
242
|
-
unique_ptr<RowDataCollectionScanner> scanner;
|
243
|
-
//! Buffer for the inputs
|
244
|
-
DataChunk input_chunk;
|
245
|
-
};
|
246
|
-
|
247
198
|
} // namespace duckdb
|
@@ -29,7 +29,7 @@ struct OuterJoinLocalScanState {
|
|
29
29
|
|
30
30
|
class OuterJoinMarker {
|
31
31
|
public:
|
32
|
-
OuterJoinMarker(bool enabled);
|
32
|
+
explicit OuterJoinMarker(bool enabled);
|
33
33
|
|
34
34
|
bool Enabled() {
|
35
35
|
return enabled;
|
@@ -60,6 +60,11 @@ public:
|
|
60
60
|
//! Perform the scan
|
61
61
|
void Scan(OuterJoinGlobalScanState &gstate, OuterJoinLocalScanState &lstate, DataChunk &result);
|
62
62
|
|
63
|
+
//! Read-only matches vector
|
64
|
+
const bool *GetMatches() const {
|
65
|
+
return found_match.get();
|
66
|
+
}
|
67
|
+
|
63
68
|
private:
|
64
69
|
bool enabled;
|
65
70
|
unique_ptr<bool[]> found_match;
|
@@ -0,0 +1,93 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/operator/join/physical_asof_join.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/execution/operator/join/physical_comparison_join.hpp"
|
12
|
+
#include "duckdb/planner/bound_result_modifier.hpp"
|
13
|
+
|
14
|
+
namespace duckdb {
|
15
|
+
|
16
|
+
//! PhysicalAsOfJoin represents a piecewise merge loop join between
|
17
|
+
//! two tables
|
18
|
+
class PhysicalAsOfJoin : public PhysicalComparisonJoin {
|
19
|
+
public:
|
20
|
+
static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::ASOF_JOIN;
|
21
|
+
|
22
|
+
public:
|
23
|
+
PhysicalAsOfJoin(LogicalComparisonJoin &op, unique_ptr<PhysicalOperator> left, unique_ptr<PhysicalOperator> right);
|
24
|
+
|
25
|
+
vector<LogicalType> join_key_types;
|
26
|
+
vector<column_t> null_sensitive;
|
27
|
+
|
28
|
+
// Equalities
|
29
|
+
vector<unique_ptr<Expression>> lhs_partitions;
|
30
|
+
vector<unique_ptr<Expression>> rhs_partitions;
|
31
|
+
|
32
|
+
// Inequality Only
|
33
|
+
vector<BoundOrderByNode> lhs_orders;
|
34
|
+
vector<BoundOrderByNode> rhs_orders;
|
35
|
+
|
36
|
+
// Projection mappings
|
37
|
+
vector<column_t> right_projection_map;
|
38
|
+
|
39
|
+
public:
|
40
|
+
// Operator Interface
|
41
|
+
unique_ptr<GlobalOperatorState> GetGlobalOperatorState(ClientContext &context) const override;
|
42
|
+
unique_ptr<OperatorState> GetOperatorState(ExecutionContext &context) const override;
|
43
|
+
|
44
|
+
bool ParallelOperator() const override {
|
45
|
+
return true;
|
46
|
+
}
|
47
|
+
|
48
|
+
protected:
|
49
|
+
// CachingOperator Interface
|
50
|
+
OperatorResultType ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
51
|
+
GlobalOperatorState &gstate, OperatorState &state) const override;
|
52
|
+
|
53
|
+
public:
|
54
|
+
// Source interface
|
55
|
+
unique_ptr<LocalSourceState> GetLocalSourceState(ExecutionContext &context,
|
56
|
+
GlobalSourceState &gstate) const override;
|
57
|
+
unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;
|
58
|
+
void GetData(ExecutionContext &context, DataChunk &chunk, GlobalSourceState &gstate,
|
59
|
+
LocalSourceState &lstate) const override;
|
60
|
+
|
61
|
+
bool IsSource() const override {
|
62
|
+
return IsRightOuterJoin(join_type);
|
63
|
+
}
|
64
|
+
bool ParallelSource() const override {
|
65
|
+
return true;
|
66
|
+
}
|
67
|
+
|
68
|
+
public:
|
69
|
+
// Sink Interface
|
70
|
+
unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
|
71
|
+
unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
|
72
|
+
SinkResultType Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate,
|
73
|
+
DataChunk &input) const override;
|
74
|
+
void Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const override;
|
75
|
+
SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
76
|
+
GlobalSinkState &gstate) const override;
|
77
|
+
|
78
|
+
bool IsSink() const override {
|
79
|
+
return true;
|
80
|
+
}
|
81
|
+
bool ParallelSink() const override {
|
82
|
+
return true;
|
83
|
+
}
|
84
|
+
|
85
|
+
private:
|
86
|
+
// resolve joins that output max N elements (SEMI, ANTI, MARK)
|
87
|
+
void ResolveSimpleJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk, OperatorState &state) const;
|
88
|
+
// resolve joins that can potentially output N*M elements (INNER, LEFT, FULL)
|
89
|
+
OperatorResultType ResolveComplexJoin(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
|
90
|
+
OperatorState &state) const;
|
91
|
+
};
|
92
|
+
|
93
|
+
} // namespace duckdb
|
@@ -199,7 +199,7 @@ public:
|
|
199
199
|
~CachingOperatorState() override {
|
200
200
|
}
|
201
201
|
|
202
|
-
|
202
|
+
void Finalize(const PhysicalOperator &op, ExecutionContext &context) override {
|
203
203
|
}
|
204
204
|
|
205
205
|
unique_ptr<DataChunk> cached_chunk;
|
@@ -71,6 +71,8 @@ struct ClientConfig {
|
|
71
71
|
bool force_external = false;
|
72
72
|
//! Force disable cross product generation when hyper graph isn't connected, used for testing
|
73
73
|
bool force_no_cross_product = false;
|
74
|
+
//! Force use of IEJoin to implement AsOfJoin, used for testing
|
75
|
+
bool force_asof_iejoin = false;
|
74
76
|
//! If this context should also try to use the available replacement scans
|
75
77
|
//! True by default
|
76
78
|
bool use_replacement_scans = true;
|
@@ -106,8 +106,8 @@ struct DBConfigOptions {
|
|
106
106
|
string collation = string();
|
107
107
|
//! The order type used when none is specified (default: ASC)
|
108
108
|
OrderType default_order_type = OrderType::ASCENDING;
|
109
|
-
//! Null ordering used when none is specified (default: NULLS
|
110
|
-
|
109
|
+
//! Null ordering used when none is specified (default: NULLS LAST)
|
110
|
+
DefaultOrderByNullType default_null_order = DefaultOrderByNullType::NULLS_LAST;
|
111
111
|
//! enable COPY and related commands
|
112
112
|
bool enable_external_access = true;
|
113
113
|
//! Whether or not object cache is used
|
@@ -145,6 +145,8 @@ struct DBConfigOptions {
|
|
145
145
|
bool enable_fsst_vectors = false;
|
146
146
|
//! Start transactions immediately in all attached databases - instead of lazily when a database is referenced
|
147
147
|
bool immediate_transaction_mode = false;
|
148
|
+
//! Debug setting - how to initialize blocks in the storage layer when allocating
|
149
|
+
DebugInitialize debug_initialize = DebugInitialize::NO_INITIALIZE;
|
148
150
|
//! The set of unrecognized (other) options
|
149
151
|
unordered_map<string, Value> unrecognized_options;
|
150
152
|
|
@@ -226,6 +228,9 @@ public:
|
|
226
228
|
void SetDefaultMaxThreads();
|
227
229
|
void SetDefaultMaxMemory();
|
228
230
|
|
231
|
+
OrderType ResolveOrder(OrderType order_type) const;
|
232
|
+
OrderByNullType ResolveNullOrder(OrderType order_type, OrderByNullType null_type) const;
|
233
|
+
|
229
234
|
private:
|
230
235
|
unique_ptr<CompressionFunctionSet> compression_functions;
|
231
236
|
unique_ptr<CastFunctionSet> cast_functions;
|
@@ -66,9 +66,19 @@ struct DebugForceNoCrossProduct {
|
|
66
66
|
};
|
67
67
|
|
68
68
|
struct OrderedAggregateThreshold {
|
69
|
-
static constexpr const char *Name = "ordered_aggregate_threshold";
|
70
|
-
static constexpr const char *Description =
|
71
|
-
|
69
|
+
static constexpr const char *Name = "ordered_aggregate_threshold"; // NOLINT
|
70
|
+
static constexpr const char *Description = // NOLINT
|
71
|
+
"the number of rows to accumulate before sorting, used for tuning";
|
72
|
+
static constexpr const LogicalTypeId InputType = LogicalTypeId::UBIGINT; // NOLINT
|
73
|
+
static void SetLocal(ClientContext &context, const Value ¶meter);
|
74
|
+
static void ResetLocal(ClientContext &context);
|
75
|
+
static Value GetSetting(ClientContext &context);
|
76
|
+
};
|
77
|
+
|
78
|
+
struct DebugAsOfIEJoin {
|
79
|
+
static constexpr const char *Name = "debug_asof_iejoin"; // NOLINT
|
80
|
+
static constexpr const char *Description = "DEBUG SETTING: force use of IEJoin to implement AsOf joins"; // NOLINT
|
81
|
+
static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN; // NOLINT
|
72
82
|
static void SetLocal(ClientContext &context, const Value ¶meter);
|
73
83
|
static void ResetLocal(ClientContext &context);
|
74
84
|
static Value GetSetting(ClientContext &context);
|
@@ -87,8 +87,10 @@ public:
|
|
87
87
|
// Start with function call
|
88
88
|
string result = schema.empty() ? function_name : schema + "." + function_name;
|
89
89
|
result += "(";
|
90
|
-
|
91
|
-
|
90
|
+
if (entry.children.size()) {
|
91
|
+
result += StringUtil::Join(entry.children, entry.children.size(), ", ",
|
92
|
+
[](const unique_ptr<BASE> &child) { return child->ToString(); });
|
93
|
+
}
|
92
94
|
// Lead/Lag extra arguments
|
93
95
|
if (entry.offset_expr.get()) {
|
94
96
|
result += ", ";
|
@@ -33,6 +33,7 @@ public:
|
|
33
33
|
|
34
34
|
public:
|
35
35
|
//! Creates a new block inside the block manager
|
36
|
+
virtual unique_ptr<Block> ConvertBlock(block_id_t block_id, FileBuffer &source_buffer) = 0;
|
36
37
|
virtual unique_ptr<Block> CreateBlock(block_id_t block_id, FileBuffer *source_buffer) = 0;
|
37
38
|
//! Return the next free block id
|
38
39
|
virtual block_id_t GetFreeBlockId() = 0;
|
@@ -20,6 +20,9 @@ public:
|
|
20
20
|
using BlockManager::BlockManager;
|
21
21
|
|
22
22
|
// LCOV_EXCL_START
|
23
|
+
unique_ptr<Block> ConvertBlock(block_id_t block_id, FileBuffer &source_buffer) override {
|
24
|
+
throw InternalException("Cannot perform IO in in-memory database!");
|
25
|
+
}
|
23
26
|
unique_ptr<Block> CreateBlock(block_id_t block_id, FileBuffer *source_buffer) override {
|
24
27
|
throw InternalException("Cannot perform IO in in-memory database!");
|
25
28
|
}
|
@@ -15,24 +15,32 @@
|
|
15
15
|
#include "duckdb/common/unordered_set.hpp"
|
16
16
|
#include "duckdb/common/set.hpp"
|
17
17
|
#include "duckdb/common/vector.hpp"
|
18
|
+
#include "duckdb/main/config.hpp"
|
18
19
|
|
19
20
|
namespace duckdb {
|
20
21
|
|
21
22
|
class DatabaseInstance;
|
22
23
|
|
24
|
+
struct StorageManagerOptions {
|
25
|
+
bool read_only = false;
|
26
|
+
bool use_direct_io = false;
|
27
|
+
DebugInitialize debug_initialize = DebugInitialize::NO_INITIALIZE;
|
28
|
+
};
|
29
|
+
|
23
30
|
//! SingleFileBlockManager is an implementation for a BlockManager which manages blocks in a single file
|
24
31
|
class SingleFileBlockManager : public BlockManager {
|
25
32
|
//! The location in the file where the block writing starts
|
26
33
|
static constexpr uint64_t BLOCK_START = Storage::FILE_HEADER_SIZE * 3;
|
27
34
|
|
28
35
|
public:
|
29
|
-
SingleFileBlockManager(AttachedDatabase &db, string path,
|
36
|
+
SingleFileBlockManager(AttachedDatabase &db, string path, StorageManagerOptions options);
|
30
37
|
|
31
38
|
void GetFileFlags(uint8_t &flags, FileLockType &lock, bool create_new);
|
32
39
|
void CreateNewDatabase();
|
33
40
|
void LoadExistingDatabase();
|
34
41
|
|
35
42
|
//! Creates a new Block using the specified block_id and returns a pointer
|
43
|
+
unique_ptr<Block> ConvertBlock(block_id_t block_id, FileBuffer &source_buffer) override;
|
36
44
|
unique_ptr<Block> CreateBlock(block_id_t block_id, FileBuffer *source_buffer) override;
|
37
45
|
//! Return the next free block id
|
38
46
|
block_id_t GetFreeBlockId() override;
|
@@ -96,10 +104,8 @@ private:
|
|
96
104
|
block_id_t free_list_id;
|
97
105
|
//! The current header iteration count
|
98
106
|
uint64_t iteration_count;
|
99
|
-
//!
|
100
|
-
|
101
|
-
//! Whether or not to use Direct IO to read the blocks
|
102
|
-
bool use_direct_io;
|
107
|
+
//! The storage manager options
|
108
|
+
StorageManagerOptions options;
|
103
109
|
//! Lock for performing various operations in the single file block manager
|
104
110
|
mutex block_lock;
|
105
111
|
};
|
@@ -54,6 +54,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
|
|
54
54
|
DUCKDB_GLOBAL(DebugCheckpointAbort),
|
55
55
|
DUCKDB_LOCAL(DebugForceExternal),
|
56
56
|
DUCKDB_LOCAL(DebugForceNoCrossProduct),
|
57
|
+
DUCKDB_LOCAL(DebugAsOfIEJoin),
|
57
58
|
DUCKDB_GLOBAL(DebugWindowMode),
|
58
59
|
DUCKDB_GLOBAL_LOCAL(DefaultCollationSetting),
|
59
60
|
DUCKDB_GLOBAL(DefaultOrderSetting),
|
@@ -359,4 +360,29 @@ bool DBConfig::operator!=(const DBConfig &other) {
|
|
359
360
|
return !(other.options == options);
|
360
361
|
}
|
361
362
|
|
363
|
+
OrderType DBConfig::ResolveOrder(OrderType order_type) const {
|
364
|
+
if (order_type != OrderType::ORDER_DEFAULT) {
|
365
|
+
return order_type;
|
366
|
+
}
|
367
|
+
return options.default_order_type;
|
368
|
+
}
|
369
|
+
|
370
|
+
OrderByNullType DBConfig::ResolveNullOrder(OrderType order_type, OrderByNullType null_type) const {
|
371
|
+
if (null_type != OrderByNullType::ORDER_DEFAULT) {
|
372
|
+
return null_type;
|
373
|
+
}
|
374
|
+
switch (options.default_null_order) {
|
375
|
+
case DefaultOrderByNullType::NULLS_FIRST:
|
376
|
+
return OrderByNullType::NULLS_FIRST;
|
377
|
+
case DefaultOrderByNullType::NULLS_LAST:
|
378
|
+
return OrderByNullType::NULLS_LAST;
|
379
|
+
case DefaultOrderByNullType::NULLS_FIRST_ON_ASC_LAST_ON_DESC:
|
380
|
+
return order_type == OrderType::ASCENDING ? OrderByNullType::NULLS_FIRST : OrderByNullType::NULLS_LAST;
|
381
|
+
case DefaultOrderByNullType::NULLS_LAST_ON_ASC_FIRST_ON_DESC:
|
382
|
+
return order_type == OrderType::ASCENDING ? OrderByNullType::NULLS_LAST : OrderByNullType::NULLS_FIRST;
|
383
|
+
default:
|
384
|
+
throw InternalException("Unknown null order setting");
|
385
|
+
}
|
386
|
+
}
|
387
|
+
|
362
388
|
} // namespace duckdb
|