duckdb 0.7.2-dev2995.0 → 0.7.2-dev3117.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
- package/src/duckdb/extension/json/include/json_serializer.hpp +8 -1
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +1 -3
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -3
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +3 -2
- package/src/duckdb/extension/parquet/parquet-extension.cpp +9 -7
- package/src/duckdb/src/common/enum_util.cpp +5908 -0
- package/src/duckdb/src/common/enums/expression_type.cpp +216 -4
- package/src/duckdb/src/common/enums/join_type.cpp +6 -5
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/exception.cpp +1 -1
- package/src/duckdb/src/common/exception_format_value.cpp +2 -2
- package/src/duckdb/src/common/multi_file_reader.cpp +14 -0
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +143 -0
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +160 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +3 -3
- package/src/duckdb/src/common/types.cpp +11 -10
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +4 -4
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +2 -1
- package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +2 -3
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +2 -2
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -2
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +2 -1
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -1
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +165 -0
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +9 -7
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -1
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +5 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +85 -29
- package/src/duckdb/src/function/table/read_csv.cpp +17 -11
- package/src/duckdb/src/function/table/system/duckdb_settings.cpp +2 -1
- package/src/duckdb/src/function/table/system/duckdb_types.cpp +2 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +958 -0
- package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +44 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +93 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +92 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/vector.hpp +61 -14
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +68 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +32 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/database.hpp +1 -3
- package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +24 -0
- package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +1 -1
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +26 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/main/client_context.cpp +1 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/database.cpp +11 -23
- package/src/duckdb/src/main/database_path_and_type.cpp +23 -0
- package/src/duckdb/src/main/relation/join_relation.cpp +2 -1
- package/src/duckdb/src/main/relation/setop_relation.cpp +2 -3
- package/src/duckdb/src/parser/expression/window_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +2 -2
- package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
- package/src/duckdb/src/parser/result_modifier.cpp +2 -2
- package/src/duckdb/src/parser/statement/select_statement.cpp +0 -44
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -3
- package/src/duckdb/src/parser/tableref.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +6 -0
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +4 -1
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +7 -0
- package/src/duckdb/src/planner/operator/logical_aggregate.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +20 -0
- package/src/duckdb/src/verification/statement_verifier.cpp +3 -0
- package/src/duckdb/ub_src_common.cpp +2 -2
- package/src/duckdb/ub_src_common_serializer.cpp +4 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_main.cpp +2 -0
- package/src/duckdb/src/common/serializer/enum_serializer.cpp +0 -1180
- package/src/duckdb/src/common/vector.cpp +0 -12
- package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +0 -113
@@ -28,7 +28,7 @@
|
|
28
28
|
#include "duckdb/parser/parser.hpp"
|
29
29
|
|
30
30
|
#include "duckdb/common/serializer/format_deserializer.hpp"
|
31
|
-
#include "duckdb/common/
|
31
|
+
#include "duckdb/common/enum_util.hpp"
|
32
32
|
#include "duckdb/common/serializer/format_serializer.hpp"
|
33
33
|
|
34
34
|
#include <cmath>
|
@@ -158,6 +158,11 @@ PhysicalType LogicalType::GetInternalType() {
|
|
158
158
|
}
|
159
159
|
}
|
160
160
|
|
161
|
+
// **DEPRECATED**: Use EnumUtil directly instead.
|
162
|
+
string LogicalTypeIdToString(LogicalTypeId type) {
|
163
|
+
return EnumUtil::ToString(type);
|
164
|
+
}
|
165
|
+
|
161
166
|
constexpr const LogicalTypeId LogicalType::INVALID;
|
162
167
|
constexpr const LogicalTypeId LogicalType::SQLNULL;
|
163
168
|
constexpr const LogicalTypeId LogicalType::BOOLEAN;
|
@@ -331,10 +336,6 @@ bool TypeIsInteger(PhysicalType type) {
|
|
331
336
|
return (type >= PhysicalType::UINT8 && type <= PhysicalType::INT64) || type == PhysicalType::INT128;
|
332
337
|
}
|
333
338
|
|
334
|
-
string LogicalTypeIdToString(LogicalTypeId id) {
|
335
|
-
return EnumSerializer::EnumToString(id);
|
336
|
-
}
|
337
|
-
|
338
339
|
string LogicalType::ToString() const {
|
339
340
|
auto alias = GetAlias();
|
340
341
|
if (!alias.empty()) {
|
@@ -406,7 +407,7 @@ string LogicalType::ToString() const {
|
|
406
407
|
return AggregateStateType::GetTypeName(*this);
|
407
408
|
}
|
408
409
|
default:
|
409
|
-
return
|
410
|
+
return EnumUtil::ToString(id_);
|
410
411
|
}
|
411
412
|
}
|
412
413
|
// LCOV_EXCL_STOP
|
@@ -1566,17 +1567,18 @@ void ExtraTypeInfo::Serialize(ExtraTypeInfo *info, FieldWriter &writer) {
|
|
1566
1567
|
}
|
1567
1568
|
void ExtraTypeInfo::FormatSerialize(FormatSerializer &serializer) const {
|
1568
1569
|
serializer.WriteProperty("type", type);
|
1570
|
+
// BREAKING: we used to write the alias last if there was additional type info, but now we write it second.
|
1569
1571
|
serializer.WriteProperty("alias", alias);
|
1570
1572
|
}
|
1571
1573
|
|
1572
1574
|
shared_ptr<ExtraTypeInfo> ExtraTypeInfo::FormatDeserialize(FormatDeserializer &deserializer) {
|
1573
1575
|
auto type = deserializer.ReadProperty<ExtraTypeInfoType>("type");
|
1576
|
+
auto alias = deserializer.ReadProperty<string>("alias");
|
1577
|
+
// BREAKING: we used to read the alias last, but now we read it second.
|
1574
1578
|
|
1575
1579
|
shared_ptr<ExtraTypeInfo> result;
|
1576
1580
|
switch (type) {
|
1577
1581
|
case ExtraTypeInfoType::INVALID_TYPE_INFO: {
|
1578
|
-
string alias;
|
1579
|
-
deserializer.ReadOptionalProperty("alias", alias);
|
1580
1582
|
if (!alias.empty()) {
|
1581
1583
|
return make_shared<ExtraTypeInfo>(type, alias);
|
1582
1584
|
}
|
@@ -1620,11 +1622,10 @@ shared_ptr<ExtraTypeInfo> ExtraTypeInfo::FormatDeserialize(FormatDeserializer &d
|
|
1620
1622
|
case ExtraTypeInfoType::AGGREGATE_STATE_TYPE_INFO:
|
1621
1623
|
result = AggregateStateTypeInfo::FormatDeserialize(deserializer);
|
1622
1624
|
break;
|
1623
|
-
|
1624
1625
|
default:
|
1625
1626
|
throw InternalException("Unimplemented type info in ExtraTypeInfo::Deserialize");
|
1626
1627
|
}
|
1627
|
-
|
1628
|
+
result->alias = alias;
|
1628
1629
|
return result;
|
1629
1630
|
}
|
1630
1631
|
|
@@ -900,15 +900,15 @@ idx_t VectorOperations::DistinctLessThan(Vector &left, Vector &right, const Sele
|
|
900
900
|
// true := A < B with nulls being minimal
|
901
901
|
idx_t VectorOperations::DistinctLessThanNullsFirst(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
|
902
902
|
SelectionVector *true_sel, SelectionVector *false_sel) {
|
903
|
-
return TemplatedDistinctSelectOperation<duckdb::
|
904
|
-
|
903
|
+
return TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThanNullsFirst, duckdb::DistinctGreaterThan>(
|
904
|
+
right, left, sel, count, true_sel, false_sel);
|
905
905
|
}
|
906
906
|
|
907
907
|
// true := A <= B with nulls being maximal
|
908
908
|
idx_t VectorOperations::DistinctLessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
|
909
909
|
SelectionVector *true_sel, SelectionVector *false_sel) {
|
910
|
-
return
|
911
|
-
|
910
|
+
return count -
|
911
|
+
TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThan>(left, right, sel, count, false_sel, true_sel);
|
912
912
|
}
|
913
913
|
|
914
914
|
// true := A != B with nulls being equal, inputs selected
|
@@ -3,6 +3,7 @@
|
|
3
3
|
#include "duckdb/common/enums/date_part_specifier.hpp"
|
4
4
|
#include "duckdb/common/exception.hpp"
|
5
5
|
#include "duckdb/common/string_util.hpp"
|
6
|
+
#include "duckdb/common/enum_util.hpp"
|
6
7
|
#include "duckdb/common/types/date.hpp"
|
7
8
|
#include "duckdb/common/types/timestamp.hpp"
|
8
9
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
@@ -81,7 +82,7 @@ DatePartSpecifier GetDateTypePartSpecifier(const string &specifier, LogicalType
|
|
81
82
|
break;
|
82
83
|
}
|
83
84
|
|
84
|
-
throw NotImplementedException("\"%s\" units \"%s\" not recognized",
|
85
|
+
throw NotImplementedException("\"%s\" units \"%s\" not recognized", EnumUtil::ToString(type.id()), specifier);
|
85
86
|
}
|
86
87
|
|
87
88
|
template <int64_t MIN, int64_t MAX>
|
@@ -1,11 +1,10 @@
|
|
1
1
|
#include "duckdb/core_functions/scalar/list_functions.hpp"
|
2
|
-
#include "duckdb/common/
|
2
|
+
#include "duckdb/common/enum_util.hpp"
|
3
3
|
#include "duckdb/common/types/chunk_collection.hpp"
|
4
4
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
5
5
|
#include "duckdb/execution/expression_executor.hpp"
|
6
6
|
#include "duckdb/planner/expression/bound_reference_expression.hpp"
|
7
7
|
#include "duckdb/main/config.hpp"
|
8
|
-
|
9
8
|
#include "duckdb/common/sort/sort.hpp"
|
10
9
|
|
11
10
|
namespace duckdb {
|
@@ -251,7 +250,7 @@ static T GetOrder(ClientContext &context, Expression &expr) {
|
|
251
250
|
}
|
252
251
|
Value order_value = ExpressionExecutor::EvaluateScalar(context, expr);
|
253
252
|
auto order_name = StringUtil::Upper(order_value.ToString());
|
254
|
-
return
|
253
|
+
return EnumUtil::FromString<T>(order_name.c_str());
|
255
254
|
}
|
256
255
|
|
257
256
|
static unique_ptr<FunctionData> ListNormalSortBind(ClientContext &context, ScalarFunction &bound_function,
|
@@ -236,7 +236,7 @@ void GroupedAggregateHashTable::Resize(idx_t size) {
|
|
236
236
|
|
237
237
|
idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
|
238
238
|
AggregateType filter) {
|
239
|
-
|
239
|
+
unsafe_vector<idx_t> aggregate_filter;
|
240
240
|
|
241
241
|
auto &aggregates = layout.GetAggregates();
|
242
242
|
for (idx_t i = 0; i < aggregates.size(); i++) {
|
@@ -249,7 +249,7 @@ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChu
|
|
249
249
|
}
|
250
250
|
|
251
251
|
idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
|
252
|
-
const
|
252
|
+
const unsafe_vector<idx_t> &filter) {
|
253
253
|
Vector hashes(LogicalType::HASH);
|
254
254
|
groups.Hash(hashes);
|
255
255
|
|
@@ -257,7 +257,7 @@ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChu
|
|
257
257
|
}
|
258
258
|
|
259
259
|
idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
|
260
|
-
DataChunk &payload, const
|
260
|
+
DataChunk &payload, const unsafe_vector<idx_t> &filter) {
|
261
261
|
D_ASSERT(!is_finalized);
|
262
262
|
if (groups.size() == 0) {
|
263
263
|
return 0;
|
@@ -171,7 +171,7 @@ bool DistinctAggregateCollectionInfo::AnyDistinct() const {
|
|
171
171
|
return !indices.empty();
|
172
172
|
}
|
173
173
|
|
174
|
-
const
|
174
|
+
const unsafe_vector<idx_t> &DistinctAggregateCollectionInfo::Indices() const {
|
175
175
|
return this->indices;
|
176
176
|
}
|
177
177
|
|
@@ -12,7 +12,7 @@ const vector<vector<idx_t>> &GroupedAggregateData::GetGroupingFunctions() const
|
|
12
12
|
|
13
13
|
void GroupedAggregateData::InitializeGroupby(vector<unique_ptr<Expression>> groups,
|
14
14
|
vector<unique_ptr<Expression>> expressions,
|
15
|
-
vector<
|
15
|
+
vector<unsafe_vector<idx_t>> grouping_functions) {
|
16
16
|
InitializeGroupbyGroups(std::move(groups));
|
17
17
|
vector<LogicalType> payload_types_filters;
|
18
18
|
|
@@ -86,7 +86,7 @@ void GroupedAggregateData::InitializeGroupbyGroups(vector<unique_ptr<Expression>
|
|
86
86
|
this->groups = std::move(groups);
|
87
87
|
}
|
88
88
|
|
89
|
-
void GroupedAggregateData::SetGroupingFunctions(vector<
|
89
|
+
void GroupedAggregateData::SetGroupingFunctions(vector<unsafe_vector<idx_t>> &functions) {
|
90
90
|
grouping_functions.reserve(functions.size());
|
91
91
|
for (idx_t i = 0; i < functions.size(); i++) {
|
92
92
|
grouping_functions.push_back(std::move(functions[i]));
|
@@ -117,7 +117,8 @@ PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<Logi
|
|
117
117
|
vector<unique_ptr<Expression>> expressions,
|
118
118
|
vector<unique_ptr<Expression>> groups_p,
|
119
119
|
vector<GroupingSet> grouping_sets_p,
|
120
|
-
vector<
|
120
|
+
vector<unsafe_vector<idx_t>> grouping_functions_p,
|
121
|
+
idx_t estimated_cardinality)
|
121
122
|
: PhysicalOperator(PhysicalOperatorType::HASH_GROUP_BY, std::move(types), estimated_cardinality),
|
122
123
|
grouping_sets(std::move(grouping_sets_p)) {
|
123
124
|
// get a list of all aggregates to be computed
|
@@ -266,7 +267,7 @@ void PhysicalHashAggregate::SinkDistinctGrouping(ExecutionContext &context, Data
|
|
266
267
|
DataChunk empty_chunk;
|
267
268
|
|
268
269
|
// Create an empty filter for Sink, since we don't need to update any aggregate states here
|
269
|
-
|
270
|
+
unsafe_vector<idx_t> empty_filter;
|
270
271
|
|
271
272
|
for (idx_t &idx : distinct_info.indices) {
|
272
273
|
auto &aggregate = grouped_aggregate_data.aggregates[idx]->Cast<BoundAggregateExpression>();
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "duckdb/execution/operator/helper/physical_streaming_sample.hpp"
|
2
2
|
#include "duckdb/common/random_engine.hpp"
|
3
3
|
#include "duckdb/common/to_string.hpp"
|
4
|
+
#include "duckdb/common/enum_util.hpp"
|
4
5
|
|
5
6
|
namespace duckdb {
|
6
7
|
|
@@ -68,7 +69,7 @@ OperatorResultType PhysicalStreamingSample::Execute(ExecutionContext &context, D
|
|
68
69
|
}
|
69
70
|
|
70
71
|
string PhysicalStreamingSample::ParamsToString() const {
|
71
|
-
return
|
72
|
+
return EnumUtil::ToString(method) + ": " + to_string(100 * percentage) + "%";
|
72
73
|
}
|
73
74
|
|
74
75
|
} // namespace duckdb
|
@@ -6,6 +6,7 @@
|
|
6
6
|
#include "duckdb/execution/operator/join/outer_join_marker.hpp"
|
7
7
|
#include "duckdb/execution/operator/join/physical_comparison_join.hpp"
|
8
8
|
#include "duckdb/execution/operator/join/physical_cross_product.hpp"
|
9
|
+
#include "duckdb/common/enum_util.hpp"
|
9
10
|
|
10
11
|
namespace duckdb {
|
11
12
|
|
@@ -200,7 +201,7 @@ OperatorResultType PhysicalBlockwiseNLJoin::ExecuteInternal(ExecutionContext &co
|
|
200
201
|
}
|
201
202
|
|
202
203
|
string PhysicalBlockwiseNLJoin::ParamsToString() const {
|
203
|
-
string extra_info =
|
204
|
+
string extra_info = EnumUtil::ToString(join_type) + "\n";
|
204
205
|
extra_info += condition->GetName();
|
205
206
|
return extra_info;
|
206
207
|
}
|
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "duckdb/execution/operator/join/physical_comparison_join.hpp"
|
2
2
|
#include "duckdb/common/types/chunk_collection.hpp"
|
3
|
+
#include "duckdb/common/enum_util.hpp"
|
3
4
|
|
4
5
|
namespace duckdb {
|
5
6
|
|
@@ -24,7 +25,7 @@ PhysicalComparisonJoin::PhysicalComparisonJoin(LogicalOperator &op, PhysicalOper
|
|
24
25
|
}
|
25
26
|
|
26
27
|
string PhysicalComparisonJoin::ParamsToString() const {
|
27
|
-
string extra_info =
|
28
|
+
string extra_info = EnumUtil::ToString(join_type) + "\n";
|
28
29
|
for (auto &it : conditions) {
|
29
30
|
string op = ExpressionTypeToOperator(it.comparison);
|
30
31
|
extra_info += it.left->GetName() + " " + op + " " + it.right->GetName() + "\n";
|
@@ -0,0 +1,165 @@
|
|
1
|
+
#include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
|
2
|
+
#include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
|
3
|
+
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
4
|
+
#include "duckdb/common/types/batched_data_collection.hpp"
|
5
|
+
#include "duckdb/common/file_system.hpp"
|
6
|
+
#include "duckdb/common/file_opener.hpp"
|
7
|
+
#include "duckdb/common/allocator.hpp"
|
8
|
+
#include <algorithm>
|
9
|
+
|
10
|
+
namespace duckdb {
|
11
|
+
|
12
|
+
PhysicalBatchCopyToFile::PhysicalBatchCopyToFile(vector<LogicalType> types, CopyFunction function_p,
|
13
|
+
unique_ptr<FunctionData> bind_data, idx_t estimated_cardinality)
|
14
|
+
: PhysicalOperator(PhysicalOperatorType::BATCH_COPY_TO_FILE, std::move(types), estimated_cardinality),
|
15
|
+
function(std::move(function_p)), bind_data(std::move(bind_data)) {
|
16
|
+
if (!function.flush_batch || !function.prepare_batch) {
|
17
|
+
throw InternalException(
|
18
|
+
"PhysicalBatchCopyToFile created for copy function that does not have prepare_batch/flush_batch defined");
|
19
|
+
}
|
20
|
+
}
|
21
|
+
|
22
|
+
//===--------------------------------------------------------------------===//
|
23
|
+
// Sink
|
24
|
+
//===--------------------------------------------------------------------===//
|
25
|
+
class BatchCopyToGlobalState : public GlobalSinkState {
|
26
|
+
public:
|
27
|
+
explicit BatchCopyToGlobalState(unique_ptr<GlobalFunctionData> global_state)
|
28
|
+
: rows_copied(0), global_state(std::move(global_state)) {
|
29
|
+
}
|
30
|
+
|
31
|
+
mutex lock;
|
32
|
+
mutex flush_lock;
|
33
|
+
atomic<idx_t> rows_copied;
|
34
|
+
unique_ptr<GlobalFunctionData> global_state;
|
35
|
+
map<idx_t, unique_ptr<PreparedBatchData>> batch_data;
|
36
|
+
};
|
37
|
+
|
38
|
+
class BatchCopyToLocalState : public LocalSinkState {
|
39
|
+
public:
|
40
|
+
explicit BatchCopyToLocalState(unique_ptr<LocalFunctionData> local_state_p)
|
41
|
+
: local_state(std::move(local_state_p)), rows_copied(0), batch_index(0) {
|
42
|
+
}
|
43
|
+
|
44
|
+
unique_ptr<LocalFunctionData> local_state;
|
45
|
+
unique_ptr<ColumnDataCollection> collection;
|
46
|
+
ColumnDataAppendState append_state;
|
47
|
+
idx_t rows_copied;
|
48
|
+
idx_t batch_index;
|
49
|
+
|
50
|
+
void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
|
51
|
+
collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), op.children[0]->types);
|
52
|
+
collection->InitializeAppend(append_state);
|
53
|
+
}
|
54
|
+
};
|
55
|
+
|
56
|
+
SinkResultType PhysicalBatchCopyToFile::Sink(ExecutionContext &context, DataChunk &chunk,
|
57
|
+
OperatorSinkInput &input) const {
|
58
|
+
auto &state = input.local_state.Cast<BatchCopyToLocalState>();
|
59
|
+
if (!state.collection) {
|
60
|
+
state.InitializeCollection(context.client, *this);
|
61
|
+
}
|
62
|
+
state.rows_copied += chunk.size();
|
63
|
+
state.collection->Append(state.append_state, chunk);
|
64
|
+
return SinkResultType::NEED_MORE_INPUT;
|
65
|
+
}
|
66
|
+
|
67
|
+
void PhysicalBatchCopyToFile::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
|
68
|
+
LocalSinkState &lstate) const {
|
69
|
+
auto &state = lstate.Cast<BatchCopyToLocalState>();
|
70
|
+
auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
|
71
|
+
gstate.rows_copied += state.rows_copied;
|
72
|
+
}
|
73
|
+
|
74
|
+
SinkFinalizeType PhysicalBatchCopyToFile::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
75
|
+
GlobalSinkState &gstate_p) const {
|
76
|
+
auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
|
77
|
+
FlushBatchData(context, gstate_p, NumericLimits<int64_t>::Maximum());
|
78
|
+
if (function.copy_to_finalize) {
|
79
|
+
function.copy_to_finalize(context, *bind_data, *gstate.global_state);
|
80
|
+
|
81
|
+
if (use_tmp_file) {
|
82
|
+
PhysicalCopyToFile::MoveTmpFile(context, file_path);
|
83
|
+
}
|
84
|
+
}
|
85
|
+
return SinkFinalizeType::READY;
|
86
|
+
}
|
87
|
+
|
88
|
+
void PhysicalBatchCopyToFile::PrepareBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t batch_index,
|
89
|
+
unique_ptr<ColumnDataCollection> collection) const {
|
90
|
+
auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
|
91
|
+
|
92
|
+
// prepare the batch
|
93
|
+
auto batch_data = function.prepare_batch(context, *bind_data, *gstate.global_state, std::move(collection));
|
94
|
+
// move the batch data to the set of prepared batch data
|
95
|
+
lock_guard<mutex> l(gstate.lock);
|
96
|
+
gstate.batch_data[batch_index] = std::move(batch_data);
|
97
|
+
}
|
98
|
+
|
99
|
+
void PhysicalBatchCopyToFile::FlushBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t min_index) const {
|
100
|
+
auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
|
101
|
+
|
102
|
+
// flush batch data to disk (if there are any to flush)
|
103
|
+
while (true) {
|
104
|
+
// grab the flush lock - we can only call flush_batch with this lock
|
105
|
+
// otherwise the data might end up in the wrong order
|
106
|
+
lock_guard<mutex> l(gstate.flush_lock);
|
107
|
+
unique_ptr<PreparedBatchData> batch_data;
|
108
|
+
{
|
109
|
+
// fetch the next batch to flush (if any)
|
110
|
+
lock_guard<mutex> l(gstate.lock);
|
111
|
+
if (gstate.batch_data.empty()) {
|
112
|
+
// no batch data left to flush
|
113
|
+
break;
|
114
|
+
}
|
115
|
+
auto entry = gstate.batch_data.begin();
|
116
|
+
if (entry->first >= min_index) {
|
117
|
+
// this data is past the min_index - we cannot write it yet
|
118
|
+
break;
|
119
|
+
}
|
120
|
+
if (!entry->second) {
|
121
|
+
// this batch is in process of being prepared but is not ready yet
|
122
|
+
break;
|
123
|
+
}
|
124
|
+
batch_data = std::move(entry->second);
|
125
|
+
gstate.batch_data.erase(entry);
|
126
|
+
}
|
127
|
+
function.flush_batch(context, *bind_data, *gstate.global_state, *batch_data);
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
131
|
+
void PhysicalBatchCopyToFile::NextBatch(ExecutionContext &context, GlobalSinkState &gstate_p,
|
132
|
+
LocalSinkState &lstate) const {
|
133
|
+
auto &state = lstate.Cast<BatchCopyToLocalState>();
|
134
|
+
if (state.collection) {
|
135
|
+
// we finished processing this batch
|
136
|
+
// start flushing data
|
137
|
+
PrepareBatchData(context.client, gstate_p, state.batch_index, std::move(state.collection));
|
138
|
+
FlushBatchData(context.client, gstate_p, lstate.partition_info.min_batch_index.GetIndex());
|
139
|
+
}
|
140
|
+
state.batch_index = lstate.partition_info.batch_index.GetIndex();
|
141
|
+
|
142
|
+
state.InitializeCollection(context.client, *this);
|
143
|
+
}
|
144
|
+
|
145
|
+
unique_ptr<LocalSinkState> PhysicalBatchCopyToFile::GetLocalSinkState(ExecutionContext &context) const {
|
146
|
+
return make_uniq<BatchCopyToLocalState>(function.copy_to_initialize_local(context, *bind_data));
|
147
|
+
}
|
148
|
+
|
149
|
+
unique_ptr<GlobalSinkState> PhysicalBatchCopyToFile::GetGlobalSinkState(ClientContext &context) const {
|
150
|
+
return make_uniq<BatchCopyToGlobalState>(function.copy_to_initialize_global(context, *bind_data, file_path));
|
151
|
+
}
|
152
|
+
|
153
|
+
//===--------------------------------------------------------------------===//
|
154
|
+
// Source
|
155
|
+
//===--------------------------------------------------------------------===//
|
156
|
+
SourceResultType PhysicalBatchCopyToFile::GetData(ExecutionContext &context, DataChunk &chunk,
|
157
|
+
OperatorSourceInput &input) const {
|
158
|
+
auto &g = sink_state->Cast<BatchCopyToGlobalState>();
|
159
|
+
|
160
|
+
chunk.SetCardinality(1);
|
161
|
+
chunk.SetValue(0, 0, Value::BIGINT(g.rows_copied));
|
162
|
+
return SourceResultType::FINISHED;
|
163
|
+
}
|
164
|
+
|
165
|
+
} // namespace duckdb
|
@@ -43,7 +43,7 @@ public:
|
|
43
43
|
// Sink
|
44
44
|
//===--------------------------------------------------------------------===//
|
45
45
|
|
46
|
-
void MoveTmpFile(ClientContext &context, const string &tmp_file_path) {
|
46
|
+
void PhysicalCopyToFile::MoveTmpFile(ClientContext &context, const string &tmp_file_path) {
|
47
47
|
auto &fs = FileSystem::GetFileSystem(context);
|
48
48
|
auto file_path = tmp_file_path.substr(0, tmp_file_path.length() - 4);
|
49
49
|
if (fs.FileExists(file_path)) {
|
@@ -1,11 +1,13 @@
|
|
1
1
|
#include "duckdb/execution/operator/schema/physical_attach.hpp"
|
2
|
-
|
2
|
+
|
3
3
|
#include "duckdb/catalog/catalog.hpp"
|
4
|
-
#include "duckdb/main/database_manager.hpp"
|
5
4
|
#include "duckdb/main/attached_database.hpp"
|
6
5
|
#include "duckdb/main/database.hpp"
|
7
|
-
#include "duckdb/
|
6
|
+
#include "duckdb/main/database_manager.hpp"
|
7
|
+
#include "duckdb/main/database_path_and_type.hpp"
|
8
8
|
#include "duckdb/main/extension_helper.hpp"
|
9
|
+
#include "duckdb/parser/parsed_data/attach_info.hpp"
|
10
|
+
#include "duckdb/storage/storage_extension.hpp"
|
9
11
|
|
10
12
|
namespace duckdb {
|
11
13
|
|
@@ -44,11 +46,11 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c
|
|
44
46
|
auto &db = DatabaseInstance::GetDatabase(context.client);
|
45
47
|
if (type.empty()) {
|
46
48
|
// try to extract type from path
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
type = ExtensionHelper::ApplyExtensionAlias(type);
|
49
|
+
auto path_and_type = DBPathAndType::Parse(info->path, config);
|
50
|
+
type = path_and_type.type;
|
51
|
+
info->path = path_and_type.path;
|
51
52
|
}
|
53
|
+
|
52
54
|
if (type.empty() && !unrecognized_option.empty()) {
|
53
55
|
throw BinderException("Unrecognized option for attach \"%s\"", unrecognized_option);
|
54
56
|
}
|
@@ -48,7 +48,7 @@ HtEntryType PartitionableHashTable::GetHTEntrySize() {
|
|
48
48
|
}
|
49
49
|
|
50
50
|
idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes,
|
51
|
-
DataChunk &payload, const
|
51
|
+
DataChunk &payload, const unsafe_vector<idx_t> &filter) {
|
52
52
|
// If this is false, a single AddChunk would overflow the max capacity
|
53
53
|
D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
|
54
54
|
if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity()) {
|
@@ -65,7 +65,7 @@ idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &group
|
|
65
65
|
}
|
66
66
|
|
67
67
|
idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition,
|
68
|
-
const
|
68
|
+
const unsafe_vector<idx_t> &filter) {
|
69
69
|
groups.Hash(hashes);
|
70
70
|
|
71
71
|
// we partition when we are asked to or when the unpartitioned ht runs out of space
|
@@ -1,17 +1,40 @@
|
|
1
1
|
#include "duckdb/execution/physical_plan_generator.hpp"
|
2
2
|
#include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
|
3
|
+
#include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
|
3
4
|
#include "duckdb/planner/operator/logical_copy_to_file.hpp"
|
4
5
|
|
5
6
|
namespace duckdb {
|
6
7
|
|
7
8
|
unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile &op) {
|
8
9
|
auto plan = CreatePlan(*op.children[0]);
|
10
|
+
bool preserve_insertion_order = PhysicalPlanGenerator::PreserveInsertionOrder(context, *plan);
|
11
|
+
bool supports_batch_index = PhysicalPlanGenerator::UseBatchIndex(context, *plan);
|
9
12
|
auto &fs = FileSystem::GetFileSystem(context);
|
10
13
|
op.file_path = fs.ExpandPath(op.file_path, FileSystem::GetFileOpener(context));
|
11
|
-
|
12
14
|
if (op.use_tmp_file) {
|
13
15
|
op.file_path += ".tmp";
|
14
16
|
}
|
17
|
+
if (op.per_thread_output || op.partition_output || !op.partition_columns.empty() || op.overwrite_or_ignore) {
|
18
|
+
// hive-partitioning/per-thread output does not care about insertion order, and does not support batch indexes
|
19
|
+
preserve_insertion_order = false;
|
20
|
+
supports_batch_index = false;
|
21
|
+
}
|
22
|
+
auto mode = CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE;
|
23
|
+
if (op.function.execution_mode) {
|
24
|
+
mode = op.function.execution_mode(preserve_insertion_order, supports_batch_index);
|
25
|
+
}
|
26
|
+
if (mode == CopyFunctionExecutionMode::BATCH_COPY_TO_FILE) {
|
27
|
+
if (!supports_batch_index) {
|
28
|
+
throw InternalException("BATCH_COPY_TO_FILE can only be used if batch indexes are supported");
|
29
|
+
}
|
30
|
+
// batched copy to file
|
31
|
+
auto copy = make_uniq<PhysicalBatchCopyToFile>(op.types, op.function, std::move(op.bind_data),
|
32
|
+
op.estimated_cardinality);
|
33
|
+
copy->file_path = op.file_path;
|
34
|
+
copy->use_tmp_file = op.use_tmp_file;
|
35
|
+
copy->children.push_back(std::move(plan));
|
36
|
+
return std::move(copy);
|
37
|
+
}
|
15
38
|
// COPY from select statement to file
|
16
39
|
auto copy = make_uniq<PhysicalCopyToFile>(op.types, op.function, std::move(op.bind_data), op.estimated_cardinality);
|
17
40
|
copy->file_path = op.file_path;
|
@@ -23,9 +46,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile
|
|
23
46
|
copy->partition_columns = op.partition_columns;
|
24
47
|
copy->names = op.names;
|
25
48
|
copy->expected_types = op.expected_types;
|
26
|
-
|
27
|
-
copy->parallel = op.function.parallel(context, *copy->bind_data);
|
28
|
-
}
|
49
|
+
copy->parallel = mode == CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE;
|
29
50
|
|
30
51
|
copy->children.push_back(std::move(plan));
|
31
52
|
return std::move(copy);
|
@@ -2,6 +2,7 @@
|
|
2
2
|
#include "duckdb/execution/operator/helper/physical_streaming_sample.hpp"
|
3
3
|
#include "duckdb/execution/physical_plan_generator.hpp"
|
4
4
|
#include "duckdb/planner/operator/logical_sample.hpp"
|
5
|
+
#include "duckdb/common/enum_util.hpp"
|
5
6
|
|
6
7
|
namespace duckdb {
|
7
8
|
|
@@ -20,7 +21,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSample &op
|
|
20
21
|
if (!op.sample_options->is_percentage) {
|
21
22
|
throw ParserException("Sample method %s cannot be used with a discrete sample count, either switch to "
|
22
23
|
"reservoir sampling or use a sample_size",
|
23
|
-
|
24
|
+
EnumUtil::ToString(op.sample_options->method));
|
24
25
|
}
|
25
26
|
sample = make_uniq<PhysicalStreamingSample>(op.types, op.sample_options->method,
|
26
27
|
op.sample_options->sample_size.GetValue<double>(),
|
@@ -131,7 +131,7 @@ void RadixPartitionedHashTable::PopulateGroupChunk(DataChunk &group_chunk, DataC
|
|
131
131
|
}
|
132
132
|
|
133
133
|
void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input,
|
134
|
-
DataChunk &payload_input, const
|
134
|
+
DataChunk &payload_input, const unsafe_vector<idx_t> &filter) const {
|
135
135
|
auto &llstate = input.local_state.Cast<RadixHTLocalState>();
|
136
136
|
auto &gstate = input.global_state.Cast<RadixHTGlobalState>();
|
137
137
|
D_ASSERT(!gstate.is_finalized);
|
@@ -10,6 +10,7 @@
|
|
10
10
|
#include "duckdb/common/types/time.hpp"
|
11
11
|
#include "duckdb/common/types/timestamp.hpp"
|
12
12
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
13
|
+
#include "duckdb/common/enum_util.hpp"
|
13
14
|
#include "duckdb/function/scalar/operators.hpp"
|
14
15
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
15
16
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
@@ -365,8 +366,8 @@ ScalarFunction AddFun::GetFunction(const LogicalType &left_type, const LogicalTy
|
|
365
366
|
break;
|
366
367
|
}
|
367
368
|
// LCOV_EXCL_START
|
368
|
-
throw NotImplementedException("AddFun for types %s, %s",
|
369
|
-
|
369
|
+
throw NotImplementedException("AddFun for types %s, %s", EnumUtil::ToString(left_type.id()),
|
370
|
+
EnumUtil::ToString(right_type.id()));
|
370
371
|
// LCOV_EXCL_STOP
|
371
372
|
}
|
372
373
|
|
@@ -617,8 +618,8 @@ ScalarFunction SubtractFun::GetFunction(const LogicalType &left_type, const Logi
|
|
617
618
|
break;
|
618
619
|
}
|
619
620
|
// LCOV_EXCL_START
|
620
|
-
throw NotImplementedException("SubtractFun for types %s, %s",
|
621
|
-
|
621
|
+
throw NotImplementedException("SubtractFun for types %s, %s", EnumUtil::ToString(left_type.id()),
|
622
|
+
EnumUtil::ToString(right_type.id()));
|
622
623
|
// LCOV_EXCL_STOP
|
623
624
|
}
|
624
625
|
|