duckdb 0.7.2-dev2995.0 → 0.7.2-dev3117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +8 -1
  5. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +1 -3
  6. package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -3
  7. package/src/duckdb/extension/json/json_functions/json_transform.cpp +3 -2
  8. package/src/duckdb/extension/parquet/parquet-extension.cpp +9 -7
  9. package/src/duckdb/src/common/enum_util.cpp +5908 -0
  10. package/src/duckdb/src/common/enums/expression_type.cpp +216 -4
  11. package/src/duckdb/src/common/enums/join_type.cpp +6 -5
  12. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  13. package/src/duckdb/src/common/exception.cpp +1 -1
  14. package/src/duckdb/src/common/exception_format_value.cpp +2 -2
  15. package/src/duckdb/src/common/multi_file_reader.cpp +14 -0
  16. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +143 -0
  17. package/src/duckdb/src/common/serializer/binary_serializer.cpp +160 -0
  18. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +3 -3
  19. package/src/duckdb/src/common/types.cpp +11 -10
  20. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +4 -4
  21. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +2 -1
  22. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +2 -3
  23. package/src/duckdb/src/execution/aggregate_hashtable.cpp +3 -3
  24. package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
  25. package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +2 -2
  26. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -2
  27. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +2 -1
  28. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -1
  29. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +2 -1
  30. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +165 -0
  31. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +1 -1
  32. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +9 -7
  33. package/src/duckdb/src/execution/partitionable_hashtable.cpp +2 -2
  34. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +25 -4
  35. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +2 -1
  36. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -1
  37. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +5 -4
  38. package/src/duckdb/src/function/table/copy_csv.cpp +85 -29
  39. package/src/duckdb/src/function/table/read_csv.cpp +17 -11
  40. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +2 -1
  41. package/src/duckdb/src/function/table/system/duckdb_types.cpp +2 -1
  42. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  43. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +958 -0
  44. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +3 -3
  45. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  46. package/src/duckdb/src/include/duckdb/common/exception.hpp +4 -4
  47. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +3 -2
  48. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +44 -0
  49. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +93 -0
  50. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +92 -0
  51. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +7 -3
  52. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +2 -2
  53. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -1
  54. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
  56. package/src/duckdb/src/include/duckdb/common/vector.hpp +61 -14
  57. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +3 -2
  58. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +2 -2
  59. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +3 -3
  61. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +68 -0
  62. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -0
  63. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -3
  64. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +32 -4
  66. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -2
  67. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  68. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -3
  69. package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +24 -0
  70. package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +1 -0
  71. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +1 -0
  72. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
  75. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +26 -0
  77. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  78. package/src/duckdb/src/main/client_context.cpp +1 -0
  79. package/src/duckdb/src/main/client_verify.cpp +1 -0
  80. package/src/duckdb/src/main/database.cpp +11 -23
  81. package/src/duckdb/src/main/database_path_and_type.cpp +23 -0
  82. package/src/duckdb/src/main/relation/join_relation.cpp +2 -1
  83. package/src/duckdb/src/main/relation/setop_relation.cpp +2 -3
  84. package/src/duckdb/src/parser/expression/window_expression.cpp +1 -1
  85. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +2 -2
  86. package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
  87. package/src/duckdb/src/parser/result_modifier.cpp +2 -2
  88. package/src/duckdb/src/parser/statement/select_statement.cpp +0 -44
  89. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -3
  90. package/src/duckdb/src/parser/tableref.cpp +1 -1
  91. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -3
  92. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +6 -0
  93. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +4 -1
  94. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +7 -0
  95. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +1 -1
  96. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +2 -2
  97. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
  98. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +20 -0
  99. package/src/duckdb/src/verification/statement_verifier.cpp +3 -0
  100. package/src/duckdb/ub_src_common.cpp +2 -2
  101. package/src/duckdb/ub_src_common_serializer.cpp +4 -2
  102. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  103. package/src/duckdb/ub_src_main.cpp +2 -0
  104. package/src/duckdb/src/common/serializer/enum_serializer.cpp +0 -1180
  105. package/src/duckdb/src/common/vector.cpp +0 -12
  106. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +0 -113
@@ -28,7 +28,7 @@
28
28
  #include "duckdb/parser/parser.hpp"
29
29
 
30
30
  #include "duckdb/common/serializer/format_deserializer.hpp"
31
- #include "duckdb/common/serializer/enum_serializer.hpp"
31
+ #include "duckdb/common/enum_util.hpp"
32
32
  #include "duckdb/common/serializer/format_serializer.hpp"
33
33
 
34
34
  #include <cmath>
@@ -158,6 +158,11 @@ PhysicalType LogicalType::GetInternalType() {
158
158
  }
159
159
  }
160
160
 
161
+ // **DEPRECATED**: Use EnumUtil directly instead.
162
+ string LogicalTypeIdToString(LogicalTypeId type) {
163
+ return EnumUtil::ToString(type);
164
+ }
165
+
161
166
  constexpr const LogicalTypeId LogicalType::INVALID;
162
167
  constexpr const LogicalTypeId LogicalType::SQLNULL;
163
168
  constexpr const LogicalTypeId LogicalType::BOOLEAN;
@@ -331,10 +336,6 @@ bool TypeIsInteger(PhysicalType type) {
331
336
  return (type >= PhysicalType::UINT8 && type <= PhysicalType::INT64) || type == PhysicalType::INT128;
332
337
  }
333
338
 
334
- string LogicalTypeIdToString(LogicalTypeId id) {
335
- return EnumSerializer::EnumToString(id);
336
- }
337
-
338
339
  string LogicalType::ToString() const {
339
340
  auto alias = GetAlias();
340
341
  if (!alias.empty()) {
@@ -406,7 +407,7 @@ string LogicalType::ToString() const {
406
407
  return AggregateStateType::GetTypeName(*this);
407
408
  }
408
409
  default:
409
- return LogicalTypeIdToString(id_);
410
+ return EnumUtil::ToString(id_);
410
411
  }
411
412
  }
412
413
  // LCOV_EXCL_STOP
@@ -1566,17 +1567,18 @@ void ExtraTypeInfo::Serialize(ExtraTypeInfo *info, FieldWriter &writer) {
1566
1567
  }
1567
1568
  void ExtraTypeInfo::FormatSerialize(FormatSerializer &serializer) const {
1568
1569
  serializer.WriteProperty("type", type);
1570
+ // BREAKING: we used to write the alias last if there was additional type info, but now we write it second.
1569
1571
  serializer.WriteProperty("alias", alias);
1570
1572
  }
1571
1573
 
1572
1574
  shared_ptr<ExtraTypeInfo> ExtraTypeInfo::FormatDeserialize(FormatDeserializer &deserializer) {
1573
1575
  auto type = deserializer.ReadProperty<ExtraTypeInfoType>("type");
1576
+ auto alias = deserializer.ReadProperty<string>("alias");
1577
+ // BREAKING: we used to read the alias last, but now we read it second.
1574
1578
 
1575
1579
  shared_ptr<ExtraTypeInfo> result;
1576
1580
  switch (type) {
1577
1581
  case ExtraTypeInfoType::INVALID_TYPE_INFO: {
1578
- string alias;
1579
- deserializer.ReadOptionalProperty("alias", alias);
1580
1582
  if (!alias.empty()) {
1581
1583
  return make_shared<ExtraTypeInfo>(type, alias);
1582
1584
  }
@@ -1620,11 +1622,10 @@ shared_ptr<ExtraTypeInfo> ExtraTypeInfo::FormatDeserialize(FormatDeserializer &d
1620
1622
  case ExtraTypeInfoType::AGGREGATE_STATE_TYPE_INFO:
1621
1623
  result = AggregateStateTypeInfo::FormatDeserialize(deserializer);
1622
1624
  break;
1623
-
1624
1625
  default:
1625
1626
  throw InternalException("Unimplemented type info in ExtraTypeInfo::Deserialize");
1626
1627
  }
1627
- deserializer.ReadOptionalPropertyOrDefault("alias", result->alias, string());
1628
+ result->alias = alias;
1628
1629
  return result;
1629
1630
  }
1630
1631
 
@@ -900,15 +900,15 @@ idx_t VectorOperations::DistinctLessThan(Vector &left, Vector &right, const Sele
900
900
  // true := A < B with nulls being minimal
901
901
  idx_t VectorOperations::DistinctLessThanNullsFirst(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
902
902
  SelectionVector *true_sel, SelectionVector *false_sel) {
903
- return TemplatedDistinctSelectOperation<duckdb::DistinctLessThanNullsFirst, duckdb::DistinctLessThan>(
904
- left, right, sel, count, true_sel, false_sel);
903
+ return TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThanNullsFirst, duckdb::DistinctGreaterThan>(
904
+ right, left, sel, count, true_sel, false_sel);
905
905
  }
906
906
 
907
907
  // true := A <= B with nulls being maximal
908
908
  idx_t VectorOperations::DistinctLessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
909
909
  SelectionVector *true_sel, SelectionVector *false_sel) {
910
- return TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThanEquals>(right, left, sel, count, true_sel,
911
- false_sel);
910
+ return count -
911
+ TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThan>(left, right, sel, count, false_sel, true_sel);
912
912
  }
913
913
 
914
914
  // true := A != B with nulls being equal, inputs selected
@@ -3,6 +3,7 @@
3
3
  #include "duckdb/common/enums/date_part_specifier.hpp"
4
4
  #include "duckdb/common/exception.hpp"
5
5
  #include "duckdb/common/string_util.hpp"
6
+ #include "duckdb/common/enum_util.hpp"
6
7
  #include "duckdb/common/types/date.hpp"
7
8
  #include "duckdb/common/types/timestamp.hpp"
8
9
  #include "duckdb/common/vector_operations/vector_operations.hpp"
@@ -81,7 +82,7 @@ DatePartSpecifier GetDateTypePartSpecifier(const string &specifier, LogicalType
81
82
  break;
82
83
  }
83
84
 
84
- throw NotImplementedException("\"%s\" units \"%s\" not recognized", LogicalTypeIdToString(type.id()), specifier);
85
+ throw NotImplementedException("\"%s\" units \"%s\" not recognized", EnumUtil::ToString(type.id()), specifier);
85
86
  }
86
87
 
87
88
  template <int64_t MIN, int64_t MAX>
@@ -1,11 +1,10 @@
1
1
  #include "duckdb/core_functions/scalar/list_functions.hpp"
2
- #include "duckdb/common/serializer/enum_serializer.hpp"
2
+ #include "duckdb/common/enum_util.hpp"
3
3
  #include "duckdb/common/types/chunk_collection.hpp"
4
4
  #include "duckdb/planner/expression/bound_function_expression.hpp"
5
5
  #include "duckdb/execution/expression_executor.hpp"
6
6
  #include "duckdb/planner/expression/bound_reference_expression.hpp"
7
7
  #include "duckdb/main/config.hpp"
8
-
9
8
  #include "duckdb/common/sort/sort.hpp"
10
9
 
11
10
  namespace duckdb {
@@ -251,7 +250,7 @@ static T GetOrder(ClientContext &context, Expression &expr) {
251
250
  }
252
251
  Value order_value = ExpressionExecutor::EvaluateScalar(context, expr);
253
252
  auto order_name = StringUtil::Upper(order_value.ToString());
254
- return EnumSerializer::StringToEnum<T>(order_name.c_str());
253
+ return EnumUtil::FromString<T>(order_name.c_str());
255
254
  }
256
255
 
257
256
  static unique_ptr<FunctionData> ListNormalSortBind(ClientContext &context, ScalarFunction &bound_function,
@@ -236,7 +236,7 @@ void GroupedAggregateHashTable::Resize(idx_t size) {
236
236
 
237
237
  idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
238
238
  AggregateType filter) {
239
- vector<idx_t> aggregate_filter;
239
+ unsafe_vector<idx_t> aggregate_filter;
240
240
 
241
241
  auto &aggregates = layout.GetAggregates();
242
242
  for (idx_t i = 0; i < aggregates.size(); i++) {
@@ -249,7 +249,7 @@ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChu
249
249
  }
250
250
 
251
251
  idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
252
- const vector<idx_t> &filter) {
252
+ const unsafe_vector<idx_t> &filter) {
253
253
  Vector hashes(LogicalType::HASH);
254
254
  groups.Hash(hashes);
255
255
 
@@ -257,7 +257,7 @@ idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChu
257
257
  }
258
258
 
259
259
  idx_t GroupedAggregateHashTable::AddChunk(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes,
260
- DataChunk &payload, const vector<idx_t> &filter) {
260
+ DataChunk &payload, const unsafe_vector<idx_t> &filter) {
261
261
  D_ASSERT(!is_finalized);
262
262
  if (groups.size() == 0) {
263
263
  return 0;
@@ -171,7 +171,7 @@ bool DistinctAggregateCollectionInfo::AnyDistinct() const {
171
171
  return !indices.empty();
172
172
  }
173
173
 
174
- const vector<idx_t> &DistinctAggregateCollectionInfo::Indices() const {
174
+ const unsafe_vector<idx_t> &DistinctAggregateCollectionInfo::Indices() const {
175
175
  return this->indices;
176
176
  }
177
177
 
@@ -12,7 +12,7 @@ const vector<vector<idx_t>> &GroupedAggregateData::GetGroupingFunctions() const
12
12
 
13
13
  void GroupedAggregateData::InitializeGroupby(vector<unique_ptr<Expression>> groups,
14
14
  vector<unique_ptr<Expression>> expressions,
15
- vector<vector<idx_t>> grouping_functions) {
15
+ vector<unsafe_vector<idx_t>> grouping_functions) {
16
16
  InitializeGroupbyGroups(std::move(groups));
17
17
  vector<LogicalType> payload_types_filters;
18
18
 
@@ -86,7 +86,7 @@ void GroupedAggregateData::InitializeGroupbyGroups(vector<unique_ptr<Expression>
86
86
  this->groups = std::move(groups);
87
87
  }
88
88
 
89
- void GroupedAggregateData::SetGroupingFunctions(vector<vector<idx_t>> &functions) {
89
+ void GroupedAggregateData::SetGroupingFunctions(vector<unsafe_vector<idx_t>> &functions) {
90
90
  grouping_functions.reserve(functions.size());
91
91
  for (idx_t i = 0; i < functions.size(); i++) {
92
92
  grouping_functions.push_back(std::move(functions[i]));
@@ -117,7 +117,8 @@ PhysicalHashAggregate::PhysicalHashAggregate(ClientContext &context, vector<Logi
117
117
  vector<unique_ptr<Expression>> expressions,
118
118
  vector<unique_ptr<Expression>> groups_p,
119
119
  vector<GroupingSet> grouping_sets_p,
120
- vector<vector<idx_t>> grouping_functions_p, idx_t estimated_cardinality)
120
+ vector<unsafe_vector<idx_t>> grouping_functions_p,
121
+ idx_t estimated_cardinality)
121
122
  : PhysicalOperator(PhysicalOperatorType::HASH_GROUP_BY, std::move(types), estimated_cardinality),
122
123
  grouping_sets(std::move(grouping_sets_p)) {
123
124
  // get a list of all aggregates to be computed
@@ -266,7 +267,7 @@ void PhysicalHashAggregate::SinkDistinctGrouping(ExecutionContext &context, Data
266
267
  DataChunk empty_chunk;
267
268
 
268
269
  // Create an empty filter for Sink, since we don't need to update any aggregate states here
269
- vector<idx_t> empty_filter;
270
+ unsafe_vector<idx_t> empty_filter;
270
271
 
271
272
  for (idx_t &idx : distinct_info.indices) {
272
273
  auto &aggregate = grouped_aggregate_data.aggregates[idx]->Cast<BoundAggregateExpression>();
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/execution/operator/helper/physical_streaming_sample.hpp"
2
2
  #include "duckdb/common/random_engine.hpp"
3
3
  #include "duckdb/common/to_string.hpp"
4
+ #include "duckdb/common/enum_util.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
@@ -68,7 +69,7 @@ OperatorResultType PhysicalStreamingSample::Execute(ExecutionContext &context, D
68
69
  }
69
70
 
70
71
  string PhysicalStreamingSample::ParamsToString() const {
71
- return SampleMethodToString(method) + ": " + to_string(100 * percentage) + "%";
72
+ return EnumUtil::ToString(method) + ": " + to_string(100 * percentage) + "%";
72
73
  }
73
74
 
74
75
  } // namespace duckdb
@@ -6,6 +6,7 @@
6
6
  #include "duckdb/execution/operator/join/outer_join_marker.hpp"
7
7
  #include "duckdb/execution/operator/join/physical_comparison_join.hpp"
8
8
  #include "duckdb/execution/operator/join/physical_cross_product.hpp"
9
+ #include "duckdb/common/enum_util.hpp"
9
10
 
10
11
  namespace duckdb {
11
12
 
@@ -200,7 +201,7 @@ OperatorResultType PhysicalBlockwiseNLJoin::ExecuteInternal(ExecutionContext &co
200
201
  }
201
202
 
202
203
  string PhysicalBlockwiseNLJoin::ParamsToString() const {
203
- string extra_info = JoinTypeToString(join_type) + "\n";
204
+ string extra_info = EnumUtil::ToString(join_type) + "\n";
204
205
  extra_info += condition->GetName();
205
206
  return extra_info;
206
207
  }
@@ -1,5 +1,6 @@
1
1
  #include "duckdb/execution/operator/join/physical_comparison_join.hpp"
2
2
  #include "duckdb/common/types/chunk_collection.hpp"
3
+ #include "duckdb/common/enum_util.hpp"
3
4
 
4
5
  namespace duckdb {
5
6
 
@@ -24,7 +25,7 @@ PhysicalComparisonJoin::PhysicalComparisonJoin(LogicalOperator &op, PhysicalOper
24
25
  }
25
26
 
26
27
  string PhysicalComparisonJoin::ParamsToString() const {
27
- string extra_info = JoinTypeToString(join_type) + "\n";
28
+ string extra_info = EnumUtil::ToString(join_type) + "\n";
28
29
  for (auto &it : conditions) {
29
30
  string op = ExpressionTypeToOperator(it.comparison);
30
31
  extra_info += it.left->GetName() + " " + op + " " + it.right->GetName() + "\n";
@@ -0,0 +1,165 @@
1
+ #include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
2
+ #include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
3
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
4
+ #include "duckdb/common/types/batched_data_collection.hpp"
5
+ #include "duckdb/common/file_system.hpp"
6
+ #include "duckdb/common/file_opener.hpp"
7
+ #include "duckdb/common/allocator.hpp"
8
+ #include <algorithm>
9
+
10
+ namespace duckdb {
11
+
12
+ PhysicalBatchCopyToFile::PhysicalBatchCopyToFile(vector<LogicalType> types, CopyFunction function_p,
13
+ unique_ptr<FunctionData> bind_data, idx_t estimated_cardinality)
14
+ : PhysicalOperator(PhysicalOperatorType::BATCH_COPY_TO_FILE, std::move(types), estimated_cardinality),
15
+ function(std::move(function_p)), bind_data(std::move(bind_data)) {
16
+ if (!function.flush_batch || !function.prepare_batch) {
17
+ throw InternalException(
18
+ "PhysicalBatchCopyToFile created for copy function that does not have prepare_batch/flush_batch defined");
19
+ }
20
+ }
21
+
22
+ //===--------------------------------------------------------------------===//
23
+ // Sink
24
+ //===--------------------------------------------------------------------===//
25
+ class BatchCopyToGlobalState : public GlobalSinkState {
26
+ public:
27
+ explicit BatchCopyToGlobalState(unique_ptr<GlobalFunctionData> global_state)
28
+ : rows_copied(0), global_state(std::move(global_state)) {
29
+ }
30
+
31
+ mutex lock;
32
+ mutex flush_lock;
33
+ atomic<idx_t> rows_copied;
34
+ unique_ptr<GlobalFunctionData> global_state;
35
+ map<idx_t, unique_ptr<PreparedBatchData>> batch_data;
36
+ };
37
+
38
+ class BatchCopyToLocalState : public LocalSinkState {
39
+ public:
40
+ explicit BatchCopyToLocalState(unique_ptr<LocalFunctionData> local_state_p)
41
+ : local_state(std::move(local_state_p)), rows_copied(0), batch_index(0) {
42
+ }
43
+
44
+ unique_ptr<LocalFunctionData> local_state;
45
+ unique_ptr<ColumnDataCollection> collection;
46
+ ColumnDataAppendState append_state;
47
+ idx_t rows_copied;
48
+ idx_t batch_index;
49
+
50
+ void InitializeCollection(ClientContext &context, const PhysicalOperator &op) {
51
+ collection = make_uniq<ColumnDataCollection>(Allocator::Get(context), op.children[0]->types);
52
+ collection->InitializeAppend(append_state);
53
+ }
54
+ };
55
+
56
+ SinkResultType PhysicalBatchCopyToFile::Sink(ExecutionContext &context, DataChunk &chunk,
57
+ OperatorSinkInput &input) const {
58
+ auto &state = input.local_state.Cast<BatchCopyToLocalState>();
59
+ if (!state.collection) {
60
+ state.InitializeCollection(context.client, *this);
61
+ }
62
+ state.rows_copied += chunk.size();
63
+ state.collection->Append(state.append_state, chunk);
64
+ return SinkResultType::NEED_MORE_INPUT;
65
+ }
66
+
67
+ void PhysicalBatchCopyToFile::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
68
+ LocalSinkState &lstate) const {
69
+ auto &state = lstate.Cast<BatchCopyToLocalState>();
70
+ auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
71
+ gstate.rows_copied += state.rows_copied;
72
+ }
73
+
74
+ SinkFinalizeType PhysicalBatchCopyToFile::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
75
+ GlobalSinkState &gstate_p) const {
76
+ auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
77
+ FlushBatchData(context, gstate_p, NumericLimits<int64_t>::Maximum());
78
+ if (function.copy_to_finalize) {
79
+ function.copy_to_finalize(context, *bind_data, *gstate.global_state);
80
+
81
+ if (use_tmp_file) {
82
+ PhysicalCopyToFile::MoveTmpFile(context, file_path);
83
+ }
84
+ }
85
+ return SinkFinalizeType::READY;
86
+ }
87
+
88
+ void PhysicalBatchCopyToFile::PrepareBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t batch_index,
89
+ unique_ptr<ColumnDataCollection> collection) const {
90
+ auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
91
+
92
+ // prepare the batch
93
+ auto batch_data = function.prepare_batch(context, *bind_data, *gstate.global_state, std::move(collection));
94
+ // move the batch data to the set of prepared batch data
95
+ lock_guard<mutex> l(gstate.lock);
96
+ gstate.batch_data[batch_index] = std::move(batch_data);
97
+ }
98
+
99
+ void PhysicalBatchCopyToFile::FlushBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t min_index) const {
100
+ auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
101
+
102
+ // flush batch data to disk (if there are any to flush)
103
+ while (true) {
104
+ // grab the flush lock - we can only call flush_batch with this lock
105
+ // otherwise the data might end up in the wrong order
106
+ lock_guard<mutex> l(gstate.flush_lock);
107
+ unique_ptr<PreparedBatchData> batch_data;
108
+ {
109
+ // fetch the next batch to flush (if any)
110
+ lock_guard<mutex> l(gstate.lock);
111
+ if (gstate.batch_data.empty()) {
112
+ // no batch data left to flush
113
+ break;
114
+ }
115
+ auto entry = gstate.batch_data.begin();
116
+ if (entry->first >= min_index) {
117
+ // this data is past the min_index - we cannot write it yet
118
+ break;
119
+ }
120
+ if (!entry->second) {
121
+ // this batch is in process of being prepared but is not ready yet
122
+ break;
123
+ }
124
+ batch_data = std::move(entry->second);
125
+ gstate.batch_data.erase(entry);
126
+ }
127
+ function.flush_batch(context, *bind_data, *gstate.global_state, *batch_data);
128
+ }
129
+ }
130
+
131
+ void PhysicalBatchCopyToFile::NextBatch(ExecutionContext &context, GlobalSinkState &gstate_p,
132
+ LocalSinkState &lstate) const {
133
+ auto &state = lstate.Cast<BatchCopyToLocalState>();
134
+ if (state.collection) {
135
+ // we finished processing this batch
136
+ // start flushing data
137
+ PrepareBatchData(context.client, gstate_p, state.batch_index, std::move(state.collection));
138
+ FlushBatchData(context.client, gstate_p, lstate.partition_info.min_batch_index.GetIndex());
139
+ }
140
+ state.batch_index = lstate.partition_info.batch_index.GetIndex();
141
+
142
+ state.InitializeCollection(context.client, *this);
143
+ }
144
+
145
+ unique_ptr<LocalSinkState> PhysicalBatchCopyToFile::GetLocalSinkState(ExecutionContext &context) const {
146
+ return make_uniq<BatchCopyToLocalState>(function.copy_to_initialize_local(context, *bind_data));
147
+ }
148
+
149
+ unique_ptr<GlobalSinkState> PhysicalBatchCopyToFile::GetGlobalSinkState(ClientContext &context) const {
150
+ return make_uniq<BatchCopyToGlobalState>(function.copy_to_initialize_global(context, *bind_data, file_path));
151
+ }
152
+
153
+ //===--------------------------------------------------------------------===//
154
+ // Source
155
+ //===--------------------------------------------------------------------===//
156
+ SourceResultType PhysicalBatchCopyToFile::GetData(ExecutionContext &context, DataChunk &chunk,
157
+ OperatorSourceInput &input) const {
158
+ auto &g = sink_state->Cast<BatchCopyToGlobalState>();
159
+
160
+ chunk.SetCardinality(1);
161
+ chunk.SetValue(0, 0, Value::BIGINT(g.rows_copied));
162
+ return SourceResultType::FINISHED;
163
+ }
164
+
165
+ } // namespace duckdb
@@ -43,7 +43,7 @@ public:
43
43
  // Sink
44
44
  //===--------------------------------------------------------------------===//
45
45
 
46
- void MoveTmpFile(ClientContext &context, const string &tmp_file_path) {
46
+ void PhysicalCopyToFile::MoveTmpFile(ClientContext &context, const string &tmp_file_path) {
47
47
  auto &fs = FileSystem::GetFileSystem(context);
48
48
  auto file_path = tmp_file_path.substr(0, tmp_file_path.length() - 4);
49
49
  if (fs.FileExists(file_path)) {
@@ -1,11 +1,13 @@
1
1
  #include "duckdb/execution/operator/schema/physical_attach.hpp"
2
- #include "duckdb/parser/parsed_data/attach_info.hpp"
2
+
3
3
  #include "duckdb/catalog/catalog.hpp"
4
- #include "duckdb/main/database_manager.hpp"
5
4
  #include "duckdb/main/attached_database.hpp"
6
5
  #include "duckdb/main/database.hpp"
7
- #include "duckdb/storage/storage_extension.hpp"
6
+ #include "duckdb/main/database_manager.hpp"
7
+ #include "duckdb/main/database_path_and_type.hpp"
8
8
  #include "duckdb/main/extension_helper.hpp"
9
+ #include "duckdb/parser/parsed_data/attach_info.hpp"
10
+ #include "duckdb/storage/storage_extension.hpp"
9
11
 
10
12
  namespace duckdb {
11
13
 
@@ -44,11 +46,11 @@ SourceResultType PhysicalAttach::GetData(ExecutionContext &context, DataChunk &c
44
46
  auto &db = DatabaseInstance::GetDatabase(context.client);
45
47
  if (type.empty()) {
46
48
  // try to extract type from path
47
- type = db.ExtractDatabaseType(info->path);
48
- }
49
- if (!type.empty()) {
50
- type = ExtensionHelper::ApplyExtensionAlias(type);
49
+ auto path_and_type = DBPathAndType::Parse(info->path, config);
50
+ type = path_and_type.type;
51
+ info->path = path_and_type.path;
51
52
  }
53
+
52
54
  if (type.empty() && !unrecognized_option.empty()) {
53
55
  throw BinderException("Unrecognized option for attach \"%s\"", unrecognized_option);
54
56
  }
@@ -48,7 +48,7 @@ HtEntryType PartitionableHashTable::GetHTEntrySize() {
48
48
  }
49
49
 
50
50
  idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes,
51
- DataChunk &payload, const vector<idx_t> &filter) {
51
+ DataChunk &payload, const unsafe_vector<idx_t> &filter) {
52
52
  // If this is false, a single AddChunk would overflow the max capacity
53
53
  D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
54
54
  if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity()) {
@@ -65,7 +65,7 @@ idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &group
65
65
  }
66
66
 
67
67
  idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition,
68
- const vector<idx_t> &filter) {
68
+ const unsafe_vector<idx_t> &filter) {
69
69
  groups.Hash(hashes);
70
70
 
71
71
  // we partition when we are asked to or when the unpartitioned ht runs out of space
@@ -1,17 +1,40 @@
1
1
  #include "duckdb/execution/physical_plan_generator.hpp"
2
2
  #include "duckdb/execution/operator/persistent/physical_copy_to_file.hpp"
3
+ #include "duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp"
3
4
  #include "duckdb/planner/operator/logical_copy_to_file.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
7
8
  unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile &op) {
8
9
  auto plan = CreatePlan(*op.children[0]);
10
+ bool preserve_insertion_order = PhysicalPlanGenerator::PreserveInsertionOrder(context, *plan);
11
+ bool supports_batch_index = PhysicalPlanGenerator::UseBatchIndex(context, *plan);
9
12
  auto &fs = FileSystem::GetFileSystem(context);
10
13
  op.file_path = fs.ExpandPath(op.file_path, FileSystem::GetFileOpener(context));
11
-
12
14
  if (op.use_tmp_file) {
13
15
  op.file_path += ".tmp";
14
16
  }
17
+ if (op.per_thread_output || op.partition_output || !op.partition_columns.empty() || op.overwrite_or_ignore) {
18
+ // hive-partitioning/per-thread output does not care about insertion order, and does not support batch indexes
19
+ preserve_insertion_order = false;
20
+ supports_batch_index = false;
21
+ }
22
+ auto mode = CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE;
23
+ if (op.function.execution_mode) {
24
+ mode = op.function.execution_mode(preserve_insertion_order, supports_batch_index);
25
+ }
26
+ if (mode == CopyFunctionExecutionMode::BATCH_COPY_TO_FILE) {
27
+ if (!supports_batch_index) {
28
+ throw InternalException("BATCH_COPY_TO_FILE can only be used if batch indexes are supported");
29
+ }
30
+ // batched copy to file
31
+ auto copy = make_uniq<PhysicalBatchCopyToFile>(op.types, op.function, std::move(op.bind_data),
32
+ op.estimated_cardinality);
33
+ copy->file_path = op.file_path;
34
+ copy->use_tmp_file = op.use_tmp_file;
35
+ copy->children.push_back(std::move(plan));
36
+ return std::move(copy);
37
+ }
15
38
  // COPY from select statement to file
16
39
  auto copy = make_uniq<PhysicalCopyToFile>(op.types, op.function, std::move(op.bind_data), op.estimated_cardinality);
17
40
  copy->file_path = op.file_path;
@@ -23,9 +46,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile
23
46
  copy->partition_columns = op.partition_columns;
24
47
  copy->names = op.names;
25
48
  copy->expected_types = op.expected_types;
26
- if (op.function.parallel) {
27
- copy->parallel = op.function.parallel(context, *copy->bind_data);
28
- }
49
+ copy->parallel = mode == CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE;
29
50
 
30
51
  copy->children.push_back(std::move(plan));
31
52
  return std::move(copy);
@@ -2,6 +2,7 @@
2
2
  #include "duckdb/execution/operator/helper/physical_streaming_sample.hpp"
3
3
  #include "duckdb/execution/physical_plan_generator.hpp"
4
4
  #include "duckdb/planner/operator/logical_sample.hpp"
5
+ #include "duckdb/common/enum_util.hpp"
5
6
 
6
7
  namespace duckdb {
7
8
 
@@ -20,7 +21,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSample &op
20
21
  if (!op.sample_options->is_percentage) {
21
22
  throw ParserException("Sample method %s cannot be used with a discrete sample count, either switch to "
22
23
  "reservoir sampling or use a sample_size",
23
- SampleMethodToString(op.sample_options->method));
24
+ EnumUtil::ToString(op.sample_options->method));
24
25
  }
25
26
  sample = make_uniq<PhysicalStreamingSample>(op.types, op.sample_options->method,
26
27
  op.sample_options->sample_size.GetValue<double>(),
@@ -131,7 +131,7 @@ void RadixPartitionedHashTable::PopulateGroupChunk(DataChunk &group_chunk, DataC
131
131
  }
132
132
 
133
133
  void RadixPartitionedHashTable::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input,
134
- DataChunk &payload_input, const vector<idx_t> &filter) const {
134
+ DataChunk &payload_input, const unsafe_vector<idx_t> &filter) const {
135
135
  auto &llstate = input.local_state.Cast<RadixHTLocalState>();
136
136
  auto &gstate = input.global_state.Cast<RadixHTGlobalState>();
137
137
  D_ASSERT(!gstate.is_finalized);
@@ -10,6 +10,7 @@
10
10
  #include "duckdb/common/types/time.hpp"
11
11
  #include "duckdb/common/types/timestamp.hpp"
12
12
  #include "duckdb/common/vector_operations/vector_operations.hpp"
13
+ #include "duckdb/common/enum_util.hpp"
13
14
  #include "duckdb/function/scalar/operators.hpp"
14
15
  #include "duckdb/planner/expression/bound_function_expression.hpp"
15
16
  #include "duckdb/function/scalar/nested_functions.hpp"
@@ -365,8 +366,8 @@ ScalarFunction AddFun::GetFunction(const LogicalType &left_type, const LogicalTy
365
366
  break;
366
367
  }
367
368
  // LCOV_EXCL_START
368
- throw NotImplementedException("AddFun for types %s, %s", LogicalTypeIdToString(left_type.id()),
369
- LogicalTypeIdToString(right_type.id()));
369
+ throw NotImplementedException("AddFun for types %s, %s", EnumUtil::ToString(left_type.id()),
370
+ EnumUtil::ToString(right_type.id()));
370
371
  // LCOV_EXCL_STOP
371
372
  }
372
373
 
@@ -617,8 +618,8 @@ ScalarFunction SubtractFun::GetFunction(const LogicalType &left_type, const Logi
617
618
  break;
618
619
  }
619
620
  // LCOV_EXCL_START
620
- throw NotImplementedException("SubtractFun for types %s, %s", LogicalTypeIdToString(left_type.id()),
621
- LogicalTypeIdToString(right_type.id()));
621
+ throw NotImplementedException("SubtractFun for types %s, %s", EnumUtil::ToString(left_type.id()),
622
+ EnumUtil::ToString(right_type.id()));
622
623
  // LCOV_EXCL_STOP
623
624
  }
624
625