duckdb 0.8.2-dev2700.0 → 0.8.2-dev2809.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-makedate.cpp +12 -6
- package/src/duckdb/src/common/adbc/adbc.cpp +52 -21
- package/src/duckdb/src/common/adbc/driver_manager.cpp +12 -2
- package/src/duckdb/src/common/enum_util.cpp +5 -0
- package/src/duckdb/src/common/types/row/row_data_collection_scanner.cpp +35 -5
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +283 -91
- package/src/duckdb/src/execution/operator/filter/physical_filter.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -2
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -6
- package/src/duckdb/src/execution/window_executor.cpp +10 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +5 -2
- package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/enums/pending_execution_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +5 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +37 -63
- package/src/duckdb/src/include/duckdb/optimizer/join_order/cost_model.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_node.hpp +14 -29
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +7 -21
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +0 -11
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +89 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +17 -31
- package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph_manager.hpp +113 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +73 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +73 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -1
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +0 -2
- package/src/duckdb/src/include/duckdb.h +11 -1
- package/src/duckdb/src/main/capi/pending-c.cpp +17 -0
- package/src/duckdb/src/main/pending_query_result.cpp +9 -1
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +79 -325
- package/src/duckdb/src/optimizer/join_order/cost_model.cpp +19 -0
- package/src/duckdb/src/optimizer/join_order/join_node.cpp +5 -37
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +48 -1078
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +552 -0
- package/src/duckdb/src/optimizer/join_order/query_graph.cpp +32 -29
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +409 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +356 -0
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +351 -0
- package/src/duckdb/src/parallel/executor.cpp +6 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +7 -0
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_get.cpp +4 -0
- package/src/duckdb/ub_src_optimizer_join_order.cpp +10 -0
package/package.json
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
#include "duckdb/common/operator/add.hpp"
|
1
2
|
#include "duckdb/common/operator/cast_operators.hpp"
|
3
|
+
#include "duckdb/common/operator/subtract.hpp"
|
2
4
|
#include "duckdb/common/types/date.hpp"
|
3
5
|
#include "duckdb/common/types/time.hpp"
|
4
6
|
#include "duckdb/common/types/timestamp.hpp"
|
@@ -66,7 +68,11 @@ struct ICUMakeDate : public ICUDateFunc {
|
|
66
68
|
struct ICUMakeTimestampTZFunc : public ICUDateFunc {
|
67
69
|
template <typename T>
|
68
70
|
static inline timestamp_t Operation(icu::Calendar *calendar, T yyyy, T mm, T dd, T hr, T mn, double ss) {
|
69
|
-
const auto year =
|
71
|
+
const auto year = Cast::Operation<T, int32_t>(AddOperator::Operation<T, T, T>(yyyy, (yyyy < 0)));
|
72
|
+
const auto month = Cast::Operation<T, int32_t>(SubtractOperatorOverflowCheck::Operation<T, T, T>(mm, 1));
|
73
|
+
const auto day = Cast::Operation<T, int32_t>(dd);
|
74
|
+
const auto hour = Cast::Operation<T, int32_t>(hr);
|
75
|
+
const auto min = Cast::Operation<T, int32_t>(mn);
|
70
76
|
|
71
77
|
const auto secs = Cast::Operation<double, int32_t>(ss);
|
72
78
|
ss -= secs;
|
@@ -74,11 +80,11 @@ struct ICUMakeTimestampTZFunc : public ICUDateFunc {
|
|
74
80
|
const auto millis = int32_t(ss);
|
75
81
|
int64_t micros = std::round((ss - millis) * Interval::MICROS_PER_MSEC);
|
76
82
|
|
77
|
-
calendar->set(UCAL_YEAR,
|
78
|
-
calendar->set(UCAL_MONTH,
|
79
|
-
calendar->set(UCAL_DATE,
|
80
|
-
calendar->set(UCAL_HOUR_OF_DAY,
|
81
|
-
calendar->set(UCAL_MINUTE,
|
83
|
+
calendar->set(UCAL_YEAR, year);
|
84
|
+
calendar->set(UCAL_MONTH, month);
|
85
|
+
calendar->set(UCAL_DATE, day);
|
86
|
+
calendar->set(UCAL_HOUR_OF_DAY, hour);
|
87
|
+
calendar->set(UCAL_MINUTE, min);
|
82
88
|
calendar->set(UCAL_SECOND, secs);
|
83
89
|
calendar->set(UCAL_MILLISECOND, millis);
|
84
90
|
|
@@ -586,16 +586,14 @@ AdbcStatusCode StatementExecuteQuery(struct AdbcStatement *statement, struct Arr
|
|
586
586
|
|
587
587
|
// this is a nop for us
|
588
588
|
AdbcStatusCode StatementPrepare(struct AdbcStatement *statement, struct AdbcError *error) {
|
589
|
-
|
590
|
-
|
591
|
-
return
|
589
|
+
if (!statement) {
|
590
|
+
SetError(error, "Missing statement object");
|
591
|
+
return ADBC_STATUS_INVALID_ARGUMENT;
|
592
592
|
}
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
return status;
|
593
|
+
if (!error) {
|
594
|
+
SetError(error, "Missing error object");
|
595
|
+
return ADBC_STATUS_INVALID_ARGUMENT;
|
597
596
|
}
|
598
|
-
|
599
597
|
return ADBC_STATUS_OK;
|
600
598
|
}
|
601
599
|
|
@@ -687,10 +685,6 @@ AdbcStatusCode QueryInternal(struct AdbcConnection *connection, struct ArrowArra
|
|
687
685
|
AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth, const char *catalog,
|
688
686
|
const char *db_schema, const char *table_name, const char **table_type,
|
689
687
|
const char *column_name, struct ArrowArrayStream *out, struct AdbcError *error) {
|
690
|
-
if (depth != 0) {
|
691
|
-
SetError(error, "Depth parameter not yet supported");
|
692
|
-
return ADBC_STATUS_NOT_IMPLEMENTED;
|
693
|
-
}
|
694
688
|
if (catalog != nullptr) {
|
695
689
|
if (strcmp(catalog, "duckdb") == 0) {
|
696
690
|
SetError(error, "catalog must be NULL or 'duckdb'");
|
@@ -702,16 +696,53 @@ AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *connection, int depth
|
|
702
696
|
SetError(error, "Table types parameter not yet supported");
|
703
697
|
return ADBC_STATUS_NOT_IMPLEMENTED;
|
704
698
|
}
|
699
|
+
std::string query;
|
700
|
+
switch (depth) {
|
701
|
+
case ADBC_OBJECT_DEPTH_CATALOGS:
|
702
|
+
SetError(error, "ADBC_OBJECT_DEPTH_CATALOGS not yet supported");
|
703
|
+
return ADBC_STATUS_NOT_IMPLEMENTED;
|
704
|
+
case ADBC_OBJECT_DEPTH_DB_SCHEMAS:
|
705
|
+
// Return metadata on catalogs and schemas.
|
706
|
+
query = duckdb::StringUtil::Format(R"(
|
707
|
+
SELECT table_schema db_schema_name
|
708
|
+
FROM information_schema.columns
|
709
|
+
WHERE table_schema LIKE '%s' AND table_name LIKE '%s' AND column_name LIKE '%s' ;
|
710
|
+
)",
|
711
|
+
db_schema ? db_schema : "%", table_name ? table_name : "%",
|
712
|
+
column_name ? column_name : "%");
|
713
|
+
break;
|
714
|
+
case ADBC_OBJECT_DEPTH_TABLES:
|
715
|
+
// Return metadata on catalogs, schemas, and tables.
|
716
|
+
query = duckdb::StringUtil::Format(R"(
|
717
|
+
SELECT table_schema db_schema_name, LIST(table_schema_list) db_schema_tables
|
718
|
+
FROM (
|
719
|
+
SELECT table_schema, { table_name : table_name} table_schema_list
|
720
|
+
FROM information_schema.columns
|
721
|
+
WHERE table_schema LIKE '%s' AND table_name LIKE '%s' AND column_name LIKE '%s' GROUP BY table_schema, table_name
|
722
|
+
) GROUP BY table_schema;
|
723
|
+
)",
|
724
|
+
db_schema ? db_schema : "%", table_name ? table_name : "%",
|
725
|
+
column_name ? column_name : "%");
|
726
|
+
break;
|
727
|
+
case ADBC_OBJECT_DEPTH_COLUMNS:
|
728
|
+
// Return metadata on catalogs, schemas, tables, and columns.
|
729
|
+
query = duckdb::StringUtil::Format(R"(
|
730
|
+
SELECT table_schema db_schema_name, LIST(table_schema_list) db_schema_tables
|
731
|
+
FROM (
|
732
|
+
SELECT table_schema, { table_name : table_name, table_columns : LIST({column_name : column_name, ordinal_position : ordinal_position + 1, remarks : ''})} table_schema_list
|
733
|
+
FROM information_schema.columns
|
734
|
+
WHERE table_schema LIKE '%s' AND table_name LIKE '%s' AND column_name LIKE '%s' GROUP BY table_schema, table_name
|
735
|
+
) GROUP BY table_schema;
|
736
|
+
)",
|
737
|
+
db_schema ? db_schema : "%", table_name ? table_name : "%",
|
738
|
+
column_name ? column_name : "%");
|
739
|
+
break;
|
740
|
+
default:
|
741
|
+
SetError(error, "Invalid value of Depth");
|
742
|
+
return ADBC_STATUS_INVALID_ARGUMENT;
|
743
|
+
}
|
705
744
|
|
706
|
-
|
707
|
-
SELECT table_schema db_schema_name, LIST(table_schema_list) db_schema_tables FROM (
|
708
|
-
SELECT table_schema, { table_name : table_name, table_columns : LIST({column_name : column_name, ordinal_position : ordinal_position + 1, remarks : ''})} table_schema_list FROM information_schema.columns WHERE table_schema LIKE '%s' AND table_name LIKE '%s' AND column_name LIKE '%s' GROUP BY table_schema, table_name
|
709
|
-
) GROUP BY table_schema;
|
710
|
-
)",
|
711
|
-
db_schema ? db_schema : "%", table_name ? table_name : "%",
|
712
|
-
column_name ? column_name : "%");
|
713
|
-
|
714
|
-
return QueryInternal(connection, out, q.c_str(), error);
|
745
|
+
return QueryInternal(connection, out, query.c_str(), error);
|
715
746
|
}
|
716
747
|
|
717
748
|
AdbcStatusCode ConnectionGetTableTypes(struct AdbcConnection *connection, struct ArrowArrayStream *out,
|
@@ -304,7 +304,12 @@ AdbcStatusCode AdbcConnectionGetObjects(struct AdbcConnection *connection, int d
|
|
304
304
|
const char *db_schema, const char *table_name, const char **table_types,
|
305
305
|
const char *column_name, struct ArrowArrayStream *stream,
|
306
306
|
struct AdbcError *error) {
|
307
|
-
if (!connection
|
307
|
+
if (!connection) {
|
308
|
+
SetError(error, "connection can't be null");
|
309
|
+
return ADBC_STATUS_INVALID_STATE;
|
310
|
+
}
|
311
|
+
if (!connection->private_data) {
|
312
|
+
SetError(error, "connection must be initialized");
|
308
313
|
return ADBC_STATUS_INVALID_STATE;
|
309
314
|
}
|
310
315
|
return connection->private_driver->ConnectionGetObjects(connection, depth, catalog, db_schema, table_name,
|
@@ -474,7 +479,12 @@ AdbcStatusCode AdbcStatementNew(struct AdbcConnection *connection, struct AdbcSt
|
|
474
479
|
}
|
475
480
|
|
476
481
|
AdbcStatusCode AdbcStatementPrepare(struct AdbcStatement *statement, struct AdbcError *error) {
|
477
|
-
|
482
|
+
auto status = SetErrorMaybe(statement, error, "Missing statement object");
|
483
|
+
if (status != ADBC_STATUS_OK) {
|
484
|
+
return status;
|
485
|
+
}
|
486
|
+
status = SetErrorMaybe(statement->private_data, error, "Invalid statement object");
|
487
|
+
if (status != ADBC_STATUS_OK) {
|
478
488
|
return ADBC_STATUS_INVALID_STATE;
|
479
489
|
}
|
480
490
|
return statement->private_driver->StatementPrepare(statement, error);
|
@@ -3872,6 +3872,8 @@ const char* EnumUtil::ToChars<PendingExecutionResult>(PendingExecutionResult val
|
|
3872
3872
|
return "RESULT_NOT_READY";
|
3873
3873
|
case PendingExecutionResult::EXECUTION_ERROR:
|
3874
3874
|
return "EXECUTION_ERROR";
|
3875
|
+
case PendingExecutionResult::NO_TASKS_AVAILABLE:
|
3876
|
+
return "NO_TASKS_AVAILABLE";
|
3875
3877
|
default:
|
3876
3878
|
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
|
3877
3879
|
}
|
@@ -3888,6 +3890,9 @@ PendingExecutionResult EnumUtil::FromString<PendingExecutionResult>(const char *
|
|
3888
3890
|
if (StringUtil::Equals(value, "EXECUTION_ERROR")) {
|
3889
3891
|
return PendingExecutionResult::EXECUTION_ERROR;
|
3890
3892
|
}
|
3893
|
+
if (StringUtil::Equals(value, "NO_TASKS_AVAILABLE")) {
|
3894
|
+
return PendingExecutionResult::NO_TASKS_AVAILABLE;
|
3895
|
+
}
|
3891
3896
|
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
3892
3897
|
}
|
3893
3898
|
|
@@ -4,6 +4,8 @@
|
|
4
4
|
#include "duckdb/common/types/row/row_data_collection.hpp"
|
5
5
|
#include "duckdb/storage/buffer_manager.hpp"
|
6
6
|
|
7
|
+
#include <numeric>
|
8
|
+
|
7
9
|
namespace duckdb {
|
8
10
|
|
9
11
|
void RowDataCollectionScanner::AlignHeapBlocks(RowDataCollection &swizzled_block_collection,
|
@@ -155,6 +157,31 @@ RowDataCollectionScanner::RowDataCollectionScanner(RowDataCollection &rows_p, Ro
|
|
155
157
|
ValidateUnscannedBlock();
|
156
158
|
}
|
157
159
|
|
160
|
+
RowDataCollectionScanner::RowDataCollectionScanner(RowDataCollection &rows_p, RowDataCollection &heap_p,
|
161
|
+
const RowLayout &layout_p, bool external_p, idx_t block_idx,
|
162
|
+
bool flush_p)
|
163
|
+
: rows(rows_p), heap(heap_p), layout(layout_p), read_state(*this), total_count(rows.count), total_scanned(0),
|
164
|
+
external(external_p), flush(flush_p), unswizzling(!layout.AllConstant() && external && !heap.keep_pinned) {
|
165
|
+
|
166
|
+
if (unswizzling) {
|
167
|
+
D_ASSERT(rows.blocks.size() == heap.blocks.size());
|
168
|
+
}
|
169
|
+
|
170
|
+
D_ASSERT(block_idx < rows.blocks.size());
|
171
|
+
read_state.block_idx = block_idx;
|
172
|
+
read_state.entry_idx = 0;
|
173
|
+
|
174
|
+
// Pretend that we have scanned up to the start block
|
175
|
+
// and will stop at the end
|
176
|
+
auto begin = rows.blocks.begin();
|
177
|
+
auto end = begin + block_idx;
|
178
|
+
total_scanned =
|
179
|
+
std::accumulate(begin, end, idx_t(0), [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
|
180
|
+
total_count = total_scanned + (*end)->count;
|
181
|
+
|
182
|
+
ValidateUnscannedBlock();
|
183
|
+
}
|
184
|
+
|
158
185
|
void RowDataCollectionScanner::SwizzleBlock(RowDataBlock &data_block, RowDataBlock &heap_block) {
|
159
186
|
// Pin the data block and swizzle the pointers within the rows
|
160
187
|
D_ASSERT(!data_block.block->IsSwizzled());
|
@@ -190,7 +217,7 @@ void RowDataCollectionScanner::ReSwizzle() {
|
|
190
217
|
}
|
191
218
|
|
192
219
|
void RowDataCollectionScanner::ValidateUnscannedBlock() const {
|
193
|
-
if (unswizzling && read_state.block_idx < rows.blocks.size()) {
|
220
|
+
if (unswizzling && read_state.block_idx < rows.blocks.size() && Remaining()) {
|
194
221
|
D_ASSERT(rows.blocks[read_state.block_idx]->block->IsSwizzled());
|
195
222
|
}
|
196
223
|
}
|
@@ -202,6 +229,9 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
|
|
202
229
|
return;
|
203
230
|
}
|
204
231
|
|
232
|
+
// Only flush blocks we processed.
|
233
|
+
const auto flush_block_idx = read_state.block_idx;
|
234
|
+
|
205
235
|
const idx_t &row_width = layout.GetRowWidth();
|
206
236
|
// Set up a batch of pointers to scan data from
|
207
237
|
idx_t scanned = 0;
|
@@ -227,6 +257,8 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
|
|
227
257
|
}
|
228
258
|
// Update state indices
|
229
259
|
read_state.entry_idx += next;
|
260
|
+
scanned += next;
|
261
|
+
total_scanned += next;
|
230
262
|
if (read_state.entry_idx == data_block->count) {
|
231
263
|
// Pin completed blocks so we don't lose them
|
232
264
|
pinned_blocks.emplace_back(rows.buffer_manager.Pin(data_block->block));
|
@@ -238,7 +270,6 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
|
|
238
270
|
read_state.entry_idx = 0;
|
239
271
|
ValidateUnscannedBlock();
|
240
272
|
}
|
241
|
-
scanned += next;
|
242
273
|
}
|
243
274
|
D_ASSERT(scanned == count);
|
244
275
|
// Deserialize the payload data
|
@@ -248,14 +279,13 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
|
|
248
279
|
}
|
249
280
|
chunk.SetCardinality(count);
|
250
281
|
chunk.Verify();
|
251
|
-
total_scanned += scanned;
|
252
282
|
|
253
283
|
// Switch to a new set of pinned blocks
|
254
284
|
read_state.pinned_blocks.swap(pinned_blocks);
|
255
285
|
|
256
286
|
if (flush) {
|
257
287
|
// Release blocks we have passed.
|
258
|
-
for (idx_t i =
|
288
|
+
for (idx_t i = flush_block_idx; i < read_state.block_idx; ++i) {
|
259
289
|
rows.blocks[i]->block = nullptr;
|
260
290
|
if (unswizzling) {
|
261
291
|
heap.blocks[i]->block = nullptr;
|
@@ -263,7 +293,7 @@ void RowDataCollectionScanner::Scan(DataChunk &chunk) {
|
|
263
293
|
}
|
264
294
|
} else if (unswizzling) {
|
265
295
|
// Reswizzle blocks we have passed so they can be flushed safely.
|
266
|
-
for (idx_t i =
|
296
|
+
for (idx_t i = flush_block_idx; i < read_state.block_idx; ++i) {
|
267
297
|
auto &data_block = rows.blocks[i];
|
268
298
|
if (data_block->block && !data_block->block->IsSwizzled()) {
|
269
299
|
SwizzleBlock(*data_block, *heap.blocks[i]);
|