duckdb 0.7.2-dev654.0 → 0.7.2-dev717.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/include/json_functions.hpp +1 -0
  3. package/src/duckdb/extension/json/json_functions.cpp +11 -4
  4. package/src/duckdb/src/common/types/column_data_collection_segment.cpp +11 -6
  5. package/src/duckdb/src/common/vector_operations/vector_cast.cpp +2 -1
  6. package/src/duckdb/src/execution/aggregate_hashtable.cpp +10 -5
  7. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
  8. package/src/duckdb/src/execution/index/art/art.cpp +5 -5
  9. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -0
  10. package/src/duckdb/src/execution/partitionable_hashtable.cpp +14 -2
  11. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -1
  12. package/src/duckdb/src/function/cast/enum_casts.cpp +25 -3
  13. package/src/duckdb/src/function/cast/list_casts.cpp +17 -4
  14. package/src/duckdb/src/function/cast/map_cast.cpp +5 -2
  15. package/src/duckdb/src/function/cast/string_cast.cpp +36 -10
  16. package/src/duckdb/src/function/cast/struct_cast.cpp +23 -3
  17. package/src/duckdb/src/function/cast/union_casts.cpp +33 -7
  18. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  19. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +45 -0
  20. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -0
  21. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +2 -2
  22. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -0
  23. package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +84 -0
  24. package/src/duckdb/src/include/duckdb/function/cast/cast_function_set.hpp +2 -2
  25. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +28 -64
  26. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +2 -2
  27. package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -3
  28. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -2
  29. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +1 -1
  30. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +3 -1
  31. package/src/duckdb/src/storage/data_table.cpp +15 -13
  32. package/src/duckdb/src/storage/index.cpp +12 -1
  33. package/src/duckdb/src/storage/local_storage.cpp +20 -23
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev654.0",
5
+ "version": "0.7.2-dev717.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -54,6 +54,7 @@ public:
54
54
 
55
55
  struct JSONFunctionLocalState : public FunctionLocalState {
56
56
  public:
57
+ explicit JSONFunctionLocalState(Allocator &allocator);
57
58
  explicit JSONFunctionLocalState(ClientContext &context);
58
59
  static unique_ptr<FunctionLocalState> Init(ExpressionState &state, const BoundFunctionExpression &expr,
59
60
  FunctionData *bind_data);
@@ -103,7 +103,10 @@ unique_ptr<FunctionData> JSONReadManyFunctionData::Bind(ClientContext &context,
103
103
  return make_unique<JSONReadManyFunctionData>(std::move(paths), std::move(lens));
104
104
  }
105
105
 
106
- JSONFunctionLocalState::JSONFunctionLocalState(ClientContext &context) : json_allocator(BufferAllocator::Get(context)) {
106
+ JSONFunctionLocalState::JSONFunctionLocalState(Allocator &allocator) : json_allocator(allocator) {
107
+ }
108
+ JSONFunctionLocalState::JSONFunctionLocalState(ClientContext &context)
109
+ : JSONFunctionLocalState(BufferAllocator::Get(context)) {
107
110
  }
108
111
 
109
112
  unique_ptr<FunctionLocalState> JSONFunctionLocalState::Init(ExpressionState &state, const BoundFunctionExpression &expr,
@@ -184,8 +187,12 @@ unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context,
184
187
  return std::move(table_function);
185
188
  }
186
189
 
187
- static unique_ptr<FunctionLocalState> InitJSONCastLocalState(ClientContext &context) {
188
- return make_unique<JSONFunctionLocalState>(context);
190
+ static unique_ptr<FunctionLocalState> InitJSONCastLocalState(CastLocalStateParameters &parameters) {
191
+ if (parameters.context) {
192
+ return make_unique<JSONFunctionLocalState>(*parameters.context);
193
+ } else {
194
+ return make_unique<JSONFunctionLocalState>(Allocator::DefaultAllocator());
195
+ }
189
196
  }
190
197
 
191
198
  static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
@@ -208,9 +215,9 @@ static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastP
208
215
  mask.SetInvalid(idx);
209
216
  success = false;
210
217
  }
211
-
212
218
  return input;
213
219
  });
220
+ result.Reinterpret(source);
214
221
  return success;
215
222
  }
216
223
 
@@ -1,4 +1,5 @@
1
1
  #include "duckdb/common/types/column_data_collection_segment.hpp"
2
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
2
3
 
3
4
  namespace duckdb {
4
5
 
@@ -202,12 +203,16 @@ idx_t ColumnDataCollectionSegment::ReadVector(ChunkManagementState &state, Vecto
202
203
  throw InternalException("Column Data Collection: mismatch in struct child sizes");
203
204
  }
204
205
  }
205
- } else if (internal_type == PhysicalType::VARCHAR &&
206
- allocator->GetType() == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR) {
207
- for (auto &swizzle_segment : vdata.swizzle_data) {
208
- auto &string_heap_segment = GetVectorData(swizzle_segment.child_index);
209
- allocator->UnswizzlePointers(state, result, swizzle_segment.offset, swizzle_segment.count,
210
- string_heap_segment.block_id, string_heap_segment.offset);
206
+ } else if (internal_type == PhysicalType::VARCHAR) {
207
+ if (allocator->GetType() == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR) {
208
+ for (auto &swizzle_segment : vdata.swizzle_data) {
209
+ auto &string_heap_segment = GetVectorData(swizzle_segment.child_index);
210
+ allocator->UnswizzlePointers(state, result, swizzle_segment.offset, swizzle_segment.count,
211
+ string_heap_segment.block_id, string_heap_segment.offset);
212
+ }
213
+ }
214
+ if (state.properties == ColumnDataScanProperties::DISALLOW_ZERO_COPY) {
215
+ VectorOperations::Copy(result, result, vdata.count, 0, 0);
211
216
  }
212
217
  }
213
218
  return vcount;
@@ -11,7 +11,8 @@ bool VectorOperations::TryCast(CastFunctionSet &set, GetCastFunctionInput &input
11
11
  auto cast_function = set.GetCastFunction(source.GetType(), result.GetType(), input);
12
12
  unique_ptr<FunctionLocalState> local_state;
13
13
  if (cast_function.init_local_state) {
14
- local_state = cast_function.init_local_state(*input.context);
14
+ CastLocalStateParameters lparameters(input.context, cast_function.cast_data);
15
+ local_state = cast_function.init_local_state(lparameters);
15
16
  }
16
17
  CastParameters parameters(cast_function.cast_data.get(), strict, error_message, local_state.get());
17
18
  return cast_function.function(source, result, count, parameters);
@@ -155,25 +155,30 @@ void GroupedAggregateHashTable::VerifyInternal() {
155
155
  D_ASSERT(count == entries);
156
156
  }
157
157
 
158
- idx_t GroupedAggregateHashTable::MaxCapacity() {
159
- idx_t max_pages = 0;
160
- idx_t max_tuples = 0;
158
+ idx_t GroupedAggregateHashTable::GetMaxCapacity(HtEntryType entry_type, idx_t tuple_size) {
159
+ idx_t max_pages;
160
+ idx_t max_tuples;
161
161
 
162
162
  switch (entry_type) {
163
163
  case HtEntryType::HT_WIDTH_32:
164
164
  max_pages = NumericLimits<uint8_t>::Maximum();
165
165
  max_tuples = NumericLimits<uint16_t>::Maximum();
166
166
  break;
167
- default:
168
- D_ASSERT(entry_type == HtEntryType::HT_WIDTH_64);
167
+ case HtEntryType::HT_WIDTH_64:
169
168
  max_pages = NumericLimits<uint32_t>::Maximum();
170
169
  max_tuples = NumericLimits<uint16_t>::Maximum();
171
170
  break;
171
+ default:
172
+ throw InternalException("Unsupported hash table width");
172
173
  }
173
174
 
174
175
  return max_pages * MinValue(max_tuples, (idx_t)Storage::BLOCK_SIZE / tuple_size);
175
176
  }
176
177
 
178
+ idx_t GroupedAggregateHashTable::MaxCapacity() {
179
+ return GetMaxCapacity(entry_type, tuple_size);
180
+ }
181
+
177
182
  void GroupedAggregateHashTable::Verify() {
178
183
  #ifdef DEBUG
179
184
  switch (entry_type) {
@@ -11,7 +11,8 @@ unique_ptr<ExpressionState> ExpressionExecutor::InitializeState(const BoundCastE
11
11
  result->AddChild(expr.child.get());
12
12
  result->Finalize();
13
13
  if (expr.bound_cast.init_local_state) {
14
- result->local_state = expr.bound_cast.init_local_state(root.executor->GetContext());
14
+ CastLocalStateParameters parameters(root.executor->GetContext(), expr.bound_cast.cast_data);
15
+ result->local_state = expr.bound_cast.init_local_state(parameters);
15
16
  }
16
17
  return std::move(result);
17
18
  }
@@ -330,8 +330,7 @@ bool ART::ConstructFromSorted(idx_t count, vector<Key> &keys, Vector &row_identi
330
330
  //===--------------------------------------------------------------------===//
331
331
  // Insert / Verification / Constraint Checking
332
332
  //===--------------------------------------------------------------------===//
333
-
334
- bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
333
+ PreservedError ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
335
334
 
336
335
  D_ASSERT(row_ids.GetType().InternalType() == ROW_TYPE);
337
336
  D_ASSERT(logical_types[0] == input.data[0].GetType());
@@ -375,12 +374,13 @@ bool ART::Insert(IndexLock &lock, DataChunk &input, Vector &row_ids) {
375
374
 
376
375
  IncreaseAndVerifyMemorySize(old_memory_size);
377
376
  if (failed_index != DConstants::INVALID_INDEX) {
378
- return false;
377
+ return PreservedError(ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicate key \"%s\"",
378
+ AppendRowError(input, failed_index)));
379
379
  }
380
- return true;
380
+ return PreservedError();
381
381
  }
382
382
 
383
- bool ART::Append(IndexLock &lock, DataChunk &appended_data, Vector &row_identifiers) {
383
+ PreservedError ART::Append(IndexLock &lock, DataChunk &appended_data, Vector &row_identifiers) {
384
384
  DataChunk expression_result;
385
385
  expression_result.Initialize(Allocator::DefaultAllocator(), logical_types);
386
386
 
@@ -121,6 +121,9 @@ bool TryCastFloatingValueCommaSeparated(const string_t &value_str, const Logical
121
121
  }
122
122
 
123
123
  bool BaseCSVReader::TryCastValue(const Value &value, const LogicalType &sql_type) {
124
+ if (value.IsNull()) {
125
+ return true;
126
+ }
124
127
  if (options.has_format[LogicalTypeId::DATE] && sql_type.id() == LogicalTypeId::DATE) {
125
128
  date_t result;
126
129
  string error_message;
@@ -62,6 +62,18 @@ PartitionableHashTable::PartitionableHashTable(ClientContext &context, Allocator
62
62
  for (hash_t r = 0; r < partition_info.n_partitions; r++) {
63
63
  sel_vectors[r].Initialize();
64
64
  }
65
+
66
+ RowLayout layout;
67
+ layout.Initialize(group_types, AggregateObject::CreateAggregateObjects(bindings));
68
+ tuple_size = layout.GetRowWidth();
69
+ }
70
+
71
+ HtEntryType PartitionableHashTable::GetHTEntrySize() {
72
+ // we need at least STANDARD_VECTOR_SIZE entries to fit in the hash table
73
+ if (GroupedAggregateHashTable::GetMaxCapacity(HtEntryType::HT_WIDTH_32, tuple_size) < STANDARD_VECTOR_SIZE) {
74
+ return HtEntryType::HT_WIDTH_64;
75
+ }
76
+ return HtEntryType::HT_WIDTH_32;
65
77
  }
66
78
 
67
79
  idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes,
@@ -74,7 +86,7 @@ idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &group
74
86
  list.back()->Finalize();
75
87
  }
76
88
  list.push_back(make_unique<GroupedAggregateHashTable>(context, allocator, group_types, payload_types, bindings,
77
- HtEntryType::HT_WIDTH_32));
89
+ GetHTEntrySize()));
78
90
  }
79
91
  return list.back()->AddChunk(groups, group_hashes, payload, filter);
80
92
  }
@@ -141,7 +153,7 @@ void PartitionableHashTable::Partition() {
141
153
  for (auto &unpartitioned_ht : unpartitioned_hts) {
142
154
  for (idx_t r = 0; r < partition_info.n_partitions; r++) {
143
155
  radix_partitioned_hts[r].push_back(make_unique<GroupedAggregateHashTable>(
144
- context, allocator, group_types, payload_types, bindings, HtEntryType::HT_WIDTH_32));
156
+ context, allocator, group_types, payload_types, bindings, GetHTEntrySize()));
145
157
  partition_hts[r] = radix_partitioned_hts[r].back().get();
146
158
  }
147
159
  unpartitioned_ht->Partition(partition_hts, partition_info.radix_mask, partition_info.RADIX_SHIFT);
@@ -6,7 +6,7 @@
6
6
 
7
7
  namespace duckdb {
8
8
 
9
- BindCastInput::BindCastInput(CastFunctionSet &function_set, BindCastInfo *info, ClientContext *context)
9
+ BindCastInput::BindCastInput(CastFunctionSet &function_set, BindCastInfo *info, optional_ptr<ClientContext> context)
10
10
  : function_set(function_set), info(info), context(context) {
11
11
  }
12
12
 
@@ -109,17 +109,39 @@ unique_ptr<BoundCastData> BindEnumCast(BindCastInput &input, const LogicalType &
109
109
  return make_unique<EnumBoundCastData>(std::move(to_varchar_cast), std::move(from_varchar_cast));
110
110
  }
111
111
 
112
+ struct EnumCastLocalState : public FunctionLocalState {
113
+ public:
114
+ unique_ptr<FunctionLocalState> to_varchar_local;
115
+ unique_ptr<FunctionLocalState> from_varchar_local;
116
+ };
117
+
118
+ static unique_ptr<FunctionLocalState> InitEnumCastLocalState(CastLocalStateParameters &parameters) {
119
+ auto &cast_data = (EnumBoundCastData &)*parameters.cast_data;
120
+ auto result = make_unique<EnumCastLocalState>();
121
+
122
+ if (cast_data.from_varchar_cast.init_local_state) {
123
+ CastLocalStateParameters from_varchar_params(parameters, cast_data.from_varchar_cast.cast_data);
124
+ result->from_varchar_local = cast_data.from_varchar_cast.init_local_state(from_varchar_params);
125
+ }
126
+ if (cast_data.to_varchar_cast.init_local_state) {
127
+ CastLocalStateParameters from_varchar_params(parameters, cast_data.to_varchar_cast.cast_data);
128
+ result->from_varchar_local = cast_data.to_varchar_cast.init_local_state(from_varchar_params);
129
+ }
130
+ return std::move(result);
131
+ }
132
+
112
133
  static bool EnumToAnyCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
113
134
  auto &cast_data = (EnumBoundCastData &)*parameters.cast_data;
135
+ auto &lstate = (EnumCastLocalState &)*parameters.local_state;
114
136
 
115
137
  Vector varchar_cast(LogicalType::VARCHAR, count);
116
138
 
117
139
  // cast to varchar
118
- CastParameters to_varchar_params(parameters, cast_data.to_varchar_cast.cast_data.get());
140
+ CastParameters to_varchar_params(parameters, cast_data.to_varchar_cast.cast_data, lstate.to_varchar_local);
119
141
  cast_data.to_varchar_cast.function(source, varchar_cast, count, to_varchar_params);
120
142
 
121
143
  // cast from varchar to the target
122
- CastParameters from_varchar_params(parameters, cast_data.from_varchar_cast.cast_data.get());
144
+ CastParameters from_varchar_params(parameters, cast_data.from_varchar_cast.cast_data, lstate.from_varchar_local);
123
145
  cast_data.from_varchar_cast.function(varchar_cast, result, count, from_varchar_params);
124
146
  return true;
125
147
  }
@@ -152,7 +174,7 @@ BoundCastInfo DefaultCasts::EnumCastSwitch(BindCastInput &input, const LogicalTy
152
174
  throw InternalException("ENUM can only have unsigned integers (except UINT64) as physical types");
153
175
  }
154
176
  default: {
155
- return BoundCastInfo(EnumToAnyCast, BindEnumCast(input, source, target));
177
+ return BoundCastInfo(EnumToAnyCast, BindEnumCast(input, source, target), InitEnumCastLocalState);
156
178
  }
157
179
  }
158
180
  }
@@ -1,5 +1,6 @@
1
1
  #include "duckdb/function/cast/default_casts.hpp"
2
2
  #include "duckdb/function/cast/cast_function_set.hpp"
3
+ #include "duckdb/function/cast/bound_cast_data.hpp"
3
4
 
4
5
  namespace duckdb {
5
6
 
@@ -12,6 +13,15 @@ unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &i
12
13
  return make_unique<ListBoundCastData>(std::move(child_cast));
13
14
  }
14
15
 
16
+ unique_ptr<FunctionLocalState> ListBoundCastData::InitListLocalState(CastLocalStateParameters &parameters) {
17
+ auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
18
+ if (!cast_data.child_cast_info.init_local_state) {
19
+ return nullptr;
20
+ }
21
+ CastLocalStateParameters child_parameters(parameters, cast_data.child_cast_info.cast_data);
22
+ return cast_data.child_cast_info.init_local_state(child_parameters);
23
+ }
24
+
15
25
  bool ListCast::ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
16
26
  auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
17
27
 
@@ -40,7 +50,7 @@ bool ListCast::ListToListCast(Vector &source, Vector &result, idx_t count, CastP
40
50
  ListVector::Reserve(result, source_size);
41
51
  auto &append_vector = ListVector::GetEntry(result);
42
52
 
43
- CastParameters child_parameters(parameters, cast_data.child_cast_info.cast_data.get());
53
+ CastParameters child_parameters(parameters, cast_data.child_cast_info.cast_data, parameters.local_state);
44
54
  if (!cast_data.child_cast_info.function(source_cc, append_vector, source_size, child_parameters)) {
45
55
  return false;
46
56
  }
@@ -116,10 +126,13 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
116
126
  BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
117
127
  switch (target.id()) {
118
128
  case LogicalTypeId::LIST:
119
- return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
129
+ return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target),
130
+ ListBoundCastData::InitListLocalState);
120
131
  case LogicalTypeId::VARCHAR:
121
- return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
122
- input, source, LogicalType::LIST(LogicalType::VARCHAR)));
132
+ return BoundCastInfo(
133
+ ListToVarcharCast,
134
+ ListBoundCastData::BindListToListCast(input, source, LogicalType::LIST(LogicalType::VARCHAR)),
135
+ ListBoundCastData::InitListLocalState);
123
136
  default:
124
137
  return DefaultCasts::TryVectorNullCast;
125
138
  }
@@ -1,5 +1,6 @@
1
1
  #include "duckdb/function/cast/default_casts.hpp"
2
2
  #include "duckdb/function/cast/cast_function_set.hpp"
3
+ #include "duckdb/function/cast/bound_cast_data.hpp"
3
4
 
4
5
  namespace duckdb {
5
6
 
@@ -78,10 +79,12 @@ static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastPa
78
79
  BoundCastInfo DefaultCasts::MapCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
79
80
  switch (target.id()) {
80
81
  case LogicalTypeId::MAP:
81
- return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
82
+ return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target),
83
+ ListBoundCastData::InitListLocalState);
82
84
  case LogicalTypeId::VARCHAR: {
83
85
  auto varchar_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
84
- return BoundCastInfo(MapToVarcharCast, ListBoundCastData::BindListToListCast(input, source, varchar_type));
86
+ return BoundCastInfo(MapToVarcharCast, ListBoundCastData::BindListToListCast(input, source, varchar_type),
87
+ ListBoundCastData::InitListLocalState);
85
88
  }
86
89
  default:
87
90
  return TryVectorNullCast;
@@ -3,6 +3,7 @@
3
3
  #include "duckdb/common/pair.hpp"
4
4
  #include "duckdb/common/vector.hpp"
5
5
  #include "duckdb/function/scalar/nested_functions.hpp"
6
+ #include "duckdb/function/cast/bound_cast_data.hpp"
6
7
 
7
8
  namespace duckdb {
8
9
 
@@ -115,7 +116,9 @@ static BoundCastInfo VectorStringCastNumericSwitch(BindCastInput &input, const L
115
116
  }
116
117
  }
117
118
 
119
+ //===--------------------------------------------------------------------===//
118
120
  // string -> list casting
121
+ //===--------------------------------------------------------------------===//
119
122
  bool VectorStringToList::StringToNestedTypeCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result,
120
123
  ValidityMask &result_mask, idx_t count, CastParameters &parameters,
121
124
  const SelectionVector *sel) {
@@ -163,7 +166,7 @@ bool VectorStringToList::StringToNestedTypeCastLoop(string_t *source_data, Valid
163
166
 
164
167
  auto &result_child = ListVector::GetEntry(result);
165
168
  auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
166
- CastParameters child_parameters(parameters, cast_data.child_cast_info.cast_data.get());
169
+ CastParameters child_parameters(parameters, cast_data.child_cast_info.cast_data, parameters.local_state);
167
170
  return cast_data.child_cast_info.function(varchar_vector, result_child, total_list_size, child_parameters) &&
168
171
  all_converted;
169
172
  }
@@ -177,11 +180,12 @@ static LogicalType InitVarcharStructType(const LogicalType &target) {
177
180
  return LogicalType::STRUCT(child_types);
178
181
  }
179
182
 
183
+ //===--------------------------------------------------------------------===//
180
184
  // string -> struct casting
185
+ //===--------------------------------------------------------------------===//
181
186
  bool VectorStringToStruct::StringToNestedTypeCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result,
182
187
  ValidityMask &result_mask, idx_t count,
183
188
  CastParameters &parameters, const SelectionVector *sel) {
184
-
185
189
  auto varchar_struct_type = InitVarcharStructType(result.GetType());
186
190
  Vector varchar_vector(varchar_struct_type, count);
187
191
  auto &child_vectors = StructVector::GetEntries(varchar_vector);
@@ -216,21 +220,39 @@ bool VectorStringToStruct::StringToNestedTypeCastLoop(string_t *source_data, Val
216
220
  }
217
221
 
218
222
  auto &cast_data = (StructBoundCastData &)*parameters.cast_data;
223
+ auto &lstate = (StructCastLocalState &)*parameters.local_state;
219
224
  D_ASSERT(cast_data.child_cast_info.size() == result_children.size());
220
225
 
221
226
  for (idx_t child_idx = 0; child_idx < result_children.size(); child_idx++) {
222
- auto &varchar_vector = *child_vectors[child_idx];
227
+ auto &child_varchar_vector = *child_vectors[child_idx];
223
228
  auto &result_child_vector = *result_children[child_idx];
224
229
  auto &child_cast_info = cast_data.child_cast_info[child_idx];
225
- CastParameters child_parameters(parameters, child_cast_info.cast_data.get());
226
- if (!child_cast_info.function(varchar_vector, result_child_vector, count, child_parameters)) {
230
+ CastParameters child_parameters(parameters, child_cast_info.cast_data, lstate.local_states[child_idx]);
231
+ if (!child_cast_info.function(child_varchar_vector, result_child_vector, count, child_parameters)) {
227
232
  all_converted = false;
228
233
  }
229
234
  }
230
235
  return all_converted;
231
236
  }
232
237
 
238
+ //===--------------------------------------------------------------------===//
233
239
  // string -> map casting
240
+ //===--------------------------------------------------------------------===//
241
+ unique_ptr<FunctionLocalState> InitMapCastLocalState(CastLocalStateParameters &parameters) {
242
+ auto &cast_data = (MapBoundCastData &)*parameters.cast_data;
243
+ auto result = make_unique<MapCastLocalState>();
244
+
245
+ if (cast_data.key_cast.init_local_state) {
246
+ CastLocalStateParameters child_params(parameters, cast_data.key_cast.cast_data);
247
+ result->key_state = cast_data.key_cast.init_local_state(child_params);
248
+ }
249
+ if (cast_data.value_cast.init_local_state) {
250
+ CastLocalStateParameters child_params(parameters, cast_data.value_cast.cast_data);
251
+ result->value_state = cast_data.value_cast.init_local_state(child_params);
252
+ }
253
+ return std::move(result);
254
+ }
255
+
234
256
  bool VectorStringToMap::StringToNestedTypeCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result,
235
257
  ValidityMask &result_mask, idx_t count, CastParameters &parameters,
236
258
  const SelectionVector *sel) {
@@ -282,12 +304,13 @@ bool VectorStringToMap::StringToNestedTypeCastLoop(string_t *source_data, Validi
282
304
  auto &result_key_child = MapVector::GetKeys(result);
283
305
  auto &result_val_child = MapVector::GetValues(result);
284
306
  auto &cast_data = (MapBoundCastData &)*parameters.cast_data;
307
+ auto &lstate = (MapCastLocalState &)*parameters.local_state;
285
308
 
286
- CastParameters key_params(parameters, cast_data.key_cast.cast_data.get());
309
+ CastParameters key_params(parameters, cast_data.key_cast.cast_data, lstate.key_state);
287
310
  if (!cast_data.key_cast.function(varchar_key_vector, result_key_child, total_elements, key_params)) {
288
311
  all_converted = false;
289
312
  }
290
- CastParameters val_params(parameters, cast_data.value_cast.cast_data.get());
313
+ CastParameters val_params(parameters, cast_data.value_cast.cast_data, lstate.value_state);
291
314
  if (!cast_data.value_cast.function(varchar_val_vector, result_val_child, total_elements, val_params)) {
292
315
  all_converted = false;
293
316
  }
@@ -373,14 +396,17 @@ BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const Logical
373
396
  // the second argument allows for a secondary casting function to be passed in the CastParameters
374
397
  return BoundCastInfo(
375
398
  &StringToNestedTypeCast<VectorStringToList>,
376
- ListBoundCastData::BindListToListCast(input, LogicalType::LIST(LogicalType::VARCHAR), target));
399
+ ListBoundCastData::BindListToListCast(input, LogicalType::LIST(LogicalType::VARCHAR), target),
400
+ ListBoundCastData::InitListLocalState);
377
401
  case LogicalTypeId::STRUCT:
378
402
  return BoundCastInfo(&StringToNestedTypeCast<VectorStringToStruct>,
379
- StructBoundCastData::BindStructToStructCast(input, InitVarcharStructType(target), target));
403
+ StructBoundCastData::BindStructToStructCast(input, InitVarcharStructType(target), target),
404
+ StructBoundCastData::InitStructCastLocalState);
380
405
  case LogicalTypeId::MAP:
381
406
  return BoundCastInfo(&StringToNestedTypeCast<VectorStringToMap>,
382
407
  MapBoundCastData::BindMapToMapCast(
383
- input, LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR), target));
408
+ input, LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR), target),
409
+ InitMapCastLocalState);
384
410
  default:
385
411
  return VectorStringCastNumericSwitch(input, source, target);
386
412
  }
@@ -1,5 +1,6 @@
1
1
  #include "duckdb/function/cast/default_casts.hpp"
2
2
  #include "duckdb/function/cast/cast_function_set.hpp"
3
+ #include "duckdb/function/cast/bound_cast_data.hpp"
3
4
 
4
5
  namespace duckdb {
5
6
 
@@ -18,8 +19,24 @@ unique_ptr<BoundCastData> StructBoundCastData::BindStructToStructCast(BindCastIn
18
19
  return make_unique<StructBoundCastData>(std::move(child_cast_info), target);
19
20
  }
20
21
 
22
+ unique_ptr<FunctionLocalState> StructBoundCastData::InitStructCastLocalState(CastLocalStateParameters &parameters) {
23
+ auto &cast_data = (StructBoundCastData &)*parameters.cast_data;
24
+ auto result = make_unique<StructCastLocalState>();
25
+
26
+ for (auto &entry : cast_data.child_cast_info) {
27
+ unique_ptr<FunctionLocalState> child_state;
28
+ if (entry.init_local_state) {
29
+ CastLocalStateParameters child_params(parameters, entry.cast_data);
30
+ child_state = entry.init_local_state(child_params);
31
+ }
32
+ result->local_states.push_back(std::move(child_state));
33
+ }
34
+ return std::move(result);
35
+ }
36
+
21
37
  static bool StructToStructCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
22
38
  auto &cast_data = (StructBoundCastData &)*parameters.cast_data;
39
+ auto &lstate = (StructCastLocalState &)*parameters.local_state;
23
40
  auto &source_child_types = StructType::GetChildTypes(source.GetType());
24
41
  auto &source_children = StructVector::GetEntries(source);
25
42
  D_ASSERT(source_children.size() == StructType::GetChildTypes(result.GetType()).size());
@@ -29,7 +46,8 @@ static bool StructToStructCast(Vector &source, Vector &result, idx_t count, Cast
29
46
  for (idx_t c_idx = 0; c_idx < source_child_types.size(); c_idx++) {
30
47
  auto &result_child_vector = *result_children[c_idx];
31
48
  auto &source_child_vector = *source_children[c_idx];
32
- CastParameters child_parameters(parameters, cast_data.child_cast_info[c_idx].cast_data.get());
49
+ CastParameters child_parameters(parameters, cast_data.child_cast_info[c_idx].cast_data,
50
+ lstate.local_states[c_idx]);
33
51
  if (!cast_data.child_cast_info[c_idx].function(source_child_vector, result_child_vector, count,
34
52
  child_parameters)) {
35
53
  all_converted = false;
@@ -121,7 +139,8 @@ BoundCastInfo DefaultCasts::StructCastSwitch(BindCastInput &input, const Logical
121
139
  const LogicalType &target) {
122
140
  switch (target.id()) {
123
141
  case LogicalTypeId::STRUCT:
124
- return BoundCastInfo(StructToStructCast, StructBoundCastData::BindStructToStructCast(input, source, target));
142
+ return BoundCastInfo(StructToStructCast, StructBoundCastData::BindStructToStructCast(input, source, target),
143
+ StructBoundCastData::InitStructCastLocalState);
125
144
  case LogicalTypeId::VARCHAR: {
126
145
  // bind a cast in which we convert all child entries to VARCHAR entries
127
146
  auto &struct_children = StructType::GetChildTypes(source);
@@ -131,7 +150,8 @@ BoundCastInfo DefaultCasts::StructCastSwitch(BindCastInput &input, const Logical
131
150
  }
132
151
  auto varchar_type = LogicalType::STRUCT(varchar_children);
133
152
  return BoundCastInfo(StructToVarcharCast,
134
- StructBoundCastData::BindStructToStructCast(input, source, varchar_type));
153
+ StructBoundCastData::BindStructToStructCast(input, source, varchar_type),
154
+ StructBoundCastData::InitStructCastLocalState);
135
155
  }
136
156
  default:
137
157
  return TryVectorNullCast;
@@ -1,6 +1,6 @@
1
1
  #include "duckdb/function/cast/cast_function_set.hpp"
2
2
  #include "duckdb/function/cast/default_casts.hpp"
3
- #include "duckdb/function/cast/vector_cast_helpers.hpp"
3
+ #include "duckdb/function/cast/bound_cast_data.hpp"
4
4
 
5
5
  #include <algorithm> // for std::sort
6
6
 
@@ -98,12 +98,21 @@ unique_ptr<BoundCastData> BindToUnionCast(BindCastInput &input, const LogicalTyp
98
98
  return make_unique<ToUnionBoundCastData>(std::move(selected_cast));
99
99
  }
100
100
 
101
+ unique_ptr<FunctionLocalState> InitToUnionLocalState(CastLocalStateParameters &parameters) {
102
+ auto &cast_data = (ToUnionBoundCastData &)*parameters.cast_data;
103
+ if (!cast_data.member_cast_info.init_local_state) {
104
+ return nullptr;
105
+ }
106
+ CastLocalStateParameters child_parameters(parameters, cast_data.member_cast_info.cast_data);
107
+ return cast_data.member_cast_info.init_local_state(child_parameters);
108
+ }
109
+
101
110
  static bool ToUnionCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
102
111
  D_ASSERT(result.GetType().id() == LogicalTypeId::UNION);
103
112
  auto &cast_data = (ToUnionBoundCastData &)*parameters.cast_data;
104
113
  auto &selected_member_vector = UnionVector::GetMember(result, cast_data.tag);
105
114
 
106
- CastParameters child_parameters(parameters, cast_data.member_cast_info.cast_data.get());
115
+ CastParameters child_parameters(parameters, cast_data.member_cast_info.cast_data, parameters.local_state);
107
116
  if (!cast_data.member_cast_info.function(source, selected_member_vector, count, child_parameters)) {
108
117
  return false;
109
118
  }
@@ -118,7 +127,7 @@ static bool ToUnionCast(Vector &source, Vector &result, idx_t count, CastParamet
118
127
 
119
128
  BoundCastInfo DefaultCasts::ImplicitToUnionCast(BindCastInput &input, const LogicalType &source,
120
129
  const LogicalType &target) {
121
- return BoundCastInfo(&ToUnionCast, BindToUnionCast(input, source, target));
130
+ return BoundCastInfo(&ToUnionCast, BindToUnionCast(input, source, target), InitToUnionLocalState);
122
131
  }
123
132
 
124
133
  //--------------------------------------------------------------------------------------------------
@@ -197,8 +206,24 @@ unique_ptr<BoundCastData> BindUnionToUnionCast(BindCastInput &input, const Logic
197
206
  return make_unique<UnionToUnionBoundCastData>(tag_map, std::move(member_casts), target);
198
207
  }
199
208
 
209
+ unique_ptr<FunctionLocalState> InitUnionToUnionLocalState(CastLocalStateParameters &parameters) {
210
+ auto &cast_data = (UnionToUnionBoundCastData &)*parameters.cast_data;
211
+ auto result = make_unique<StructCastLocalState>();
212
+
213
+ for (auto &entry : cast_data.member_casts) {
214
+ unique_ptr<FunctionLocalState> child_state;
215
+ if (entry.init_local_state) {
216
+ CastLocalStateParameters child_params(parameters, entry.cast_data);
217
+ child_state = entry.init_local_state(child_params);
218
+ }
219
+ result->local_states.push_back(std::move(child_state));
220
+ }
221
+ return std::move(result);
222
+ }
223
+
200
224
  static bool UnionToUnionCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
201
225
  auto &cast_data = (UnionToUnionBoundCastData &)*parameters.cast_data;
226
+ auto &lstate = (StructCastLocalState &)*parameters.local_state;
202
227
 
203
228
  auto source_member_count = UnionType::GetMemberCount(source.GetType());
204
229
  auto target_member_count = UnionType::GetMemberCount(result.GetType());
@@ -213,7 +238,7 @@ static bool UnionToUnionCast(Vector &source, Vector &result, idx_t count, CastPa
213
238
  auto &target_member_vector = UnionVector::GetMember(result, target_member_idx);
214
239
  auto &member_cast = cast_data.member_casts[member_idx];
215
240
 
216
- CastParameters child_parameters(parameters, member_cast.cast_data.get());
241
+ CastParameters child_parameters(parameters, member_cast.cast_data, lstate.local_states[member_idx]);
217
242
  if (!member_cast.function(source_member_vector, target_member_vector, count, child_parameters)) {
218
243
  return false;
219
244
  }
@@ -339,10 +364,11 @@ BoundCastInfo DefaultCasts::UnionCastSwitch(BindCastInput &input, const LogicalT
339
364
  varchar_members.push_back(make_pair(UnionType::GetMemberName(source, member_idx), LogicalType::VARCHAR));
340
365
  }
341
366
  auto varchar_type = LogicalType::UNION(std::move(varchar_members));
342
- return BoundCastInfo(UnionToVarcharCast, BindUnionToUnionCast(input, source, varchar_type));
343
- } break;
367
+ return BoundCastInfo(UnionToVarcharCast, BindUnionToUnionCast(input, source, varchar_type),
368
+ InitUnionToUnionLocalState);
369
+ }
344
370
  case LogicalTypeId::UNION:
345
- return BoundCastInfo(UnionToUnionCast, BindUnionToUnionCast(input, source, target));
371
+ return BoundCastInfo(UnionToUnionCast, BindUnionToUnionCast(input, source, target), InitUnionToUnionLocalState);
346
372
  default:
347
373
  return TryVectorNullCast;
348
374
  }
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev654"
2
+ #define DUCKDB_VERSION "0.7.2-dev717"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "6525767cf1"
5
+ #define DUCKDB_SOURCE_ID "cd47ad8e2d"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -0,0 +1,45 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/optional_ptr.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/exception.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ template <class T>
16
+ class optional_ptr {
17
+ public:
18
+ optional_ptr() : ptr(nullptr) {
19
+ }
20
+ optional_ptr(T *ptr_p) : ptr(ptr_p) { // NOLINT: allow implicit creation from pointer
21
+ }
22
+ optional_ptr(const unique_ptr<T> &ptr_p) : ptr(ptr_p.get()) { // NOLINT: allow implicit creation from unique pointer
23
+ }
24
+
25
+ operator bool() const {
26
+ return ptr;
27
+ }
28
+ T &operator*() {
29
+ if (!ptr) {
30
+ throw InternalException("Attempting to dereference an optional pointer that is not set");
31
+ }
32
+ return *ptr;
33
+ }
34
+ T *operator->() {
35
+ if (!ptr) {
36
+ throw InternalException("Attempting to call a method on an optional pointer that is not set");
37
+ }
38
+ return ptr;
39
+ }
40
+
41
+ private:
42
+ T *ptr;
43
+ };
44
+
45
+ } // namespace duckdb
@@ -113,6 +113,7 @@ public:
113
113
  }
114
114
 
115
115
  idx_t MaxCapacity();
116
+ static idx_t GetMaxCapacity(HtEntryType entry_type, idx_t tuple_size);
116
117
 
117
118
  void Partition(vector<GroupedAggregateHashTable *> &partition_hts, hash_t mask, idx_t shift);
118
119
 
@@ -72,7 +72,7 @@ public:
72
72
  vector<row_t> &result_ids) override;
73
73
 
74
74
  //! Called when data is appended to the index. The lock obtained from InitializeLock must be held
75
- bool Append(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
75
+ PreservedError Append(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
76
76
  //! Verify that data can be appended to the index without a constraint violation
77
77
  void VerifyAppend(DataChunk &chunk) override;
78
78
  //! Verify that data can be appended to the index without a constraint violation using the conflict manager
@@ -80,7 +80,7 @@ public:
80
80
  //! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held
81
81
  void Delete(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
82
82
  //! Insert a chunk of entries into the index
83
- bool Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override;
83
+ PreservedError Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override;
84
84
 
85
85
  //! Construct an ART from a vector of sorted keys
86
86
  bool ConstructFromSorted(idx_t count, vector<Key> &keys, Vector &row_identifiers);
@@ -57,9 +57,12 @@ private:
57
57
 
58
58
  HashTableList unpartitioned_hts;
59
59
  unordered_map<hash_t, HashTableList> radix_partitioned_hts;
60
+ idx_t tuple_size;
60
61
 
61
62
  private:
62
63
  idx_t ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes, DataChunk &payload,
63
64
  const vector<idx_t> &filter);
65
+ //! Returns the HT entry size used for intermediate hash tables
66
+ HtEntryType GetHTEntrySize();
64
67
  };
65
68
  } // namespace duckdb
@@ -0,0 +1,84 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/function/cast/bound_cast_data.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/function/cast/default_casts.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ struct ListBoundCastData : public BoundCastData {
16
+ explicit ListBoundCastData(BoundCastInfo child_cast) : child_cast_info(std::move(child_cast)) {
17
+ }
18
+
19
+ BoundCastInfo child_cast_info;
20
+ static unique_ptr<BoundCastData> BindListToListCast(BindCastInput &input, const LogicalType &source,
21
+ const LogicalType &target);
22
+ static unique_ptr<FunctionLocalState> InitListLocalState(CastLocalStateParameters &parameters);
23
+
24
+ public:
25
+ unique_ptr<BoundCastData> Copy() const override {
26
+ return make_unique<ListBoundCastData>(child_cast_info.Copy());
27
+ }
28
+ };
29
+
30
+ struct ListCast {
31
+ static bool ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
32
+ };
33
+
34
+ struct StructBoundCastData : public BoundCastData {
35
+ StructBoundCastData(vector<BoundCastInfo> child_casts, LogicalType target_p)
36
+ : child_cast_info(std::move(child_casts)), target(std::move(target_p)) {
37
+ }
38
+
39
+ vector<BoundCastInfo> child_cast_info;
40
+ LogicalType target;
41
+
42
+ static unique_ptr<BoundCastData> BindStructToStructCast(BindCastInput &input, const LogicalType &source,
43
+ const LogicalType &target);
44
+ static unique_ptr<FunctionLocalState> InitStructCastLocalState(CastLocalStateParameters &parameters);
45
+
46
+ public:
47
+ unique_ptr<BoundCastData> Copy() const override {
48
+ vector<BoundCastInfo> copy_info;
49
+ for (auto &info : child_cast_info) {
50
+ copy_info.push_back(info.Copy());
51
+ }
52
+ return make_unique<StructBoundCastData>(std::move(copy_info), target);
53
+ }
54
+ };
55
+
56
+ struct StructCastLocalState : public FunctionLocalState {
57
+ public:
58
+ vector<unique_ptr<FunctionLocalState>> local_states;
59
+ };
60
+
61
+ struct MapBoundCastData : public BoundCastData {
62
+ MapBoundCastData(BoundCastInfo key_cast, BoundCastInfo value_cast)
63
+ : key_cast(std::move(key_cast)), value_cast(std::move(value_cast)) {
64
+ }
65
+
66
+ BoundCastInfo key_cast;
67
+ BoundCastInfo value_cast;
68
+
69
+ static unique_ptr<BoundCastData> BindMapToMapCast(BindCastInput &input, const LogicalType &source,
70
+ const LogicalType &target);
71
+
72
+ public:
73
+ unique_ptr<BoundCastData> Copy() const override {
74
+ return make_unique<MapBoundCastData>(key_cast.Copy(), value_cast.Copy());
75
+ }
76
+ };
77
+
78
+ struct MapCastLocalState : public FunctionLocalState {
79
+ public:
80
+ unique_ptr<FunctionLocalState> key_state;
81
+ unique_ptr<FunctionLocalState> value_state;
82
+ };
83
+
84
+ } // namespace duckdb
@@ -19,12 +19,12 @@ typedef BoundCastInfo (*bind_cast_function_t)(BindCastInput &input, const Logica
19
19
  typedef int64_t (*implicit_cast_cost_t)(const LogicalType &from, const LogicalType &to);
20
20
 
21
21
  struct GetCastFunctionInput {
22
- GetCastFunctionInput(ClientContext *context = nullptr) : context(context) {
22
+ GetCastFunctionInput(optional_ptr<ClientContext> context = nullptr) : context(context) {
23
23
  }
24
24
  GetCastFunctionInput(ClientContext &context) : context(&context) {
25
25
  }
26
26
 
27
- ClientContext *context;
27
+ optional_ptr<ClientContext> context;
28
28
  };
29
29
 
30
30
  struct BindCastFunction {
@@ -10,6 +10,8 @@
10
10
 
11
11
  #include "duckdb/common/types.hpp"
12
12
  #include "duckdb/common/types/vector.hpp"
13
+ #include "duckdb/common/optional_ptr.hpp"
14
+ #include "duckdb/function/scalar_function.hpp"
13
15
 
14
16
  namespace duckdb {
15
17
 
@@ -31,25 +33,43 @@ struct BoundCastData {
31
33
  struct CastParameters {
32
34
  CastParameters() {
33
35
  }
34
- CastParameters(BoundCastData *cast_data, bool strict, string *error_message, FunctionLocalState *local_state)
36
+ CastParameters(BoundCastData *cast_data, bool strict, string *error_message,
37
+ optional_ptr<FunctionLocalState> local_state)
35
38
  : cast_data(cast_data), strict(strict), error_message(error_message), local_state(local_state) {
36
39
  }
37
- CastParameters(CastParameters &parent, BoundCastData *cast_data = nullptr)
38
- : cast_data(cast_data), strict(parent.strict), error_message(parent.error_message) {
40
+ CastParameters(CastParameters &parent, optional_ptr<BoundCastData> cast_data,
41
+ optional_ptr<FunctionLocalState> local_state)
42
+ : cast_data(cast_data), strict(parent.strict), error_message(parent.error_message), local_state(local_state) {
39
43
  }
40
44
 
41
45
  //! The bound cast data (if any)
42
- BoundCastData *cast_data = nullptr;
46
+ optional_ptr<BoundCastData> cast_data;
43
47
  //! whether or not to enable strict casting
44
48
  bool strict = false;
45
49
  // out: error message in case cast has failed
46
50
  string *error_message = nullptr;
47
51
  //! Local state
48
- FunctionLocalState *local_state = nullptr;
52
+ optional_ptr<FunctionLocalState> local_state;
53
+ };
54
+
55
+ struct CastLocalStateParameters {
56
+ CastLocalStateParameters(optional_ptr<ClientContext> context_p, optional_ptr<BoundCastData> cast_data_p)
57
+ : context(context_p), cast_data(cast_data_p) {
58
+ }
59
+ CastLocalStateParameters(ClientContext &context_p, optional_ptr<BoundCastData> cast_data_p)
60
+ : context(&context_p), cast_data(cast_data_p) {
61
+ }
62
+ CastLocalStateParameters(CastLocalStateParameters &parent, optional_ptr<BoundCastData> cast_data_p)
63
+ : context(parent.context), cast_data(cast_data_p) {
64
+ }
65
+
66
+ optional_ptr<ClientContext> context;
67
+ //! The bound cast data (if any)
68
+ optional_ptr<BoundCastData> cast_data;
49
69
  };
50
70
 
51
71
  typedef bool (*cast_function_t)(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
52
- typedef unique_ptr<FunctionLocalState> (*init_cast_local_state_t)(ClientContext &context);
72
+ typedef unique_ptr<FunctionLocalState> (*init_cast_local_state_t)(CastLocalStateParameters &parameters);
53
73
 
54
74
  struct BoundCastInfo {
55
75
  DUCKDB_API
@@ -65,72 +85,16 @@ public:
65
85
  };
66
86
 
67
87
  struct BindCastInput {
68
- DUCKDB_API BindCastInput(CastFunctionSet &function_set, BindCastInfo *info, ClientContext *context);
88
+ DUCKDB_API BindCastInput(CastFunctionSet &function_set, BindCastInfo *info, optional_ptr<ClientContext> context);
69
89
 
70
90
  CastFunctionSet &function_set;
71
91
  BindCastInfo *info;
72
- ClientContext *context;
92
+ optional_ptr<ClientContext> context;
73
93
 
74
94
  public:
75
95
  DUCKDB_API BoundCastInfo GetCastFunction(const LogicalType &source, const LogicalType &target);
76
96
  };
77
97
 
78
- struct ListBoundCastData : public BoundCastData {
79
- explicit ListBoundCastData(BoundCastInfo child_cast) : child_cast_info(std::move(child_cast)) {
80
- }
81
-
82
- BoundCastInfo child_cast_info;
83
- static unique_ptr<BoundCastData> BindListToListCast(BindCastInput &input, const LogicalType &source,
84
- const LogicalType &target);
85
-
86
- public:
87
- unique_ptr<BoundCastData> Copy() const override {
88
- return make_unique<ListBoundCastData>(child_cast_info.Copy());
89
- }
90
- };
91
-
92
- struct ListCast {
93
- static bool ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters);
94
- };
95
-
96
- struct StructBoundCastData : public BoundCastData {
97
- StructBoundCastData(vector<BoundCastInfo> child_casts, LogicalType target_p)
98
- : child_cast_info(std::move(child_casts)), target(std::move(target_p)) {
99
- }
100
-
101
- vector<BoundCastInfo> child_cast_info;
102
- LogicalType target;
103
-
104
- static unique_ptr<BoundCastData> BindStructToStructCast(BindCastInput &input, const LogicalType &source,
105
- const LogicalType &target);
106
-
107
- public:
108
- unique_ptr<BoundCastData> Copy() const override {
109
- vector<BoundCastInfo> copy_info;
110
- for (auto &info : child_cast_info) {
111
- copy_info.push_back(info.Copy());
112
- }
113
- return make_unique<StructBoundCastData>(std::move(copy_info), target);
114
- }
115
- };
116
-
117
- struct MapBoundCastData : public BoundCastData {
118
- MapBoundCastData(BoundCastInfo key_cast, BoundCastInfo value_cast)
119
- : key_cast(std::move(key_cast)), value_cast(std::move(value_cast)) {
120
- }
121
-
122
- BoundCastInfo key_cast;
123
- BoundCastInfo value_cast;
124
-
125
- static unique_ptr<BoundCastData> BindMapToMapCast(BindCastInput &input, const LogicalType &source,
126
- const LogicalType &target);
127
-
128
- public:
129
- unique_ptr<BoundCastData> Copy() const override {
130
- return make_unique<MapBoundCastData>(key_cast.Copy(), value_cast.Copy());
131
- }
132
- };
133
-
134
98
  struct DefaultCasts {
135
99
  DUCKDB_API static BoundCastInfo GetDefaultCastFunction(BindCastInput &input, const LogicalType &source,
136
100
  const LogicalType &target);
@@ -150,8 +150,8 @@ public:
150
150
 
151
151
  //! Append a chunk with the row ids [row_start, ..., row_start + chunk.size()] to all indexes of the table, returns
152
152
  //! whether or not the append succeeded
153
- bool AppendToIndexes(DataChunk &chunk, row_t row_start);
154
- static bool AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t row_start);
153
+ PreservedError AppendToIndexes(DataChunk &chunk, row_t row_start);
154
+ static PreservedError AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t row_start);
155
155
  //! Remove a chunk with the row ids [row_start, ..., row_start + chunk.size()] from all indexes of the table
156
156
  void RemoveFromIndexes(TableAppendState &state, DataChunk &chunk, row_t row_start);
157
157
  //! Remove the chunk with the specified set of row identifiers from all indexes of the table
@@ -80,9 +80,9 @@ public:
80
80
  //! Obtain a lock on the index
81
81
  virtual void InitializeLock(IndexLock &state);
82
82
  //! Called when data is appended to the index. The lock obtained from InitializeLock must be held
83
- virtual bool Append(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0;
83
+ virtual PreservedError Append(IndexLock &state, DataChunk &entries, Vector &row_identifiers) = 0;
84
84
  //! Obtains a lock and calls Append while holding that lock
85
- bool Append(DataChunk &entries, Vector &row_identifiers);
85
+ PreservedError Append(DataChunk &entries, Vector &row_identifiers);
86
86
  //! Verify that data can be appended to the index without a constraint violation
87
87
  virtual void VerifyAppend(DataChunk &chunk) = 0;
88
88
  //! Verify that data can be appended to the index without a constraint violation using the conflict manager
@@ -96,7 +96,7 @@ public:
96
96
  void Delete(DataChunk &entries, Vector &row_identifiers);
97
97
 
98
98
  //! Insert a chunk of entries into the index
99
- virtual bool Insert(IndexLock &lock, DataChunk &input, Vector &row_identifiers) = 0;
99
+ virtual PreservedError Insert(IndexLock &lock, DataChunk &input, Vector &row_identifiers) = 0;
100
100
 
101
101
  //! Merge another index into this index. The lock obtained from InitializeLock must be held, and the other
102
102
  //! index must also be locked during the merge
@@ -147,6 +147,7 @@ public:
147
147
 
148
148
  //! Execute the index expressions on an input chunk
149
149
  void ExecuteExpressions(DataChunk &input, DataChunk &result);
150
+ static string AppendRowError(DataChunk &input, idx_t index);
150
151
 
151
152
  protected:
152
153
  //! Lock used for any changes to the index
@@ -88,8 +88,8 @@ public:
88
88
 
89
89
  void AppendToIndexes(DuckTransaction &transaction, TableAppendState &append_state, idx_t append_count,
90
90
  bool append_to_table);
91
- bool AppendToIndexes(DuckTransaction &transaction, RowGroupCollection &source, TableIndexList &index_list,
92
- const vector<LogicalType> &table_types, row_t &start_row);
91
+ PreservedError AppendToIndexes(DuckTransaction &transaction, RowGroupCollection &source, TableIndexList &index_list,
92
+ const vector<LogicalType> &table_types, row_t &start_row);
93
93
 
94
94
  //! Creates an optimistic writer for this table
95
95
  OptimisticDataWriter *CreateOptimisticWriter();
@@ -134,7 +134,7 @@ unique_ptr<ParsedExpression> Transformer::TransformFuncCall(duckdb_libpgquery::P
134
134
  schema = INVALID_SCHEMA;
135
135
  function_name = reinterpret_cast<duckdb_libpgquery::PGValue *>(name->head->data.ptr_value)->val.str;
136
136
  } else {
137
- throw InternalException("TransformFuncCall - Expected 1, 2 or 3 qualifications");
137
+ throw ParserException("TransformFuncCall - Expected 1, 2 or 3 qualifications");
138
138
  }
139
139
 
140
140
  // transform children
@@ -158,7 +158,9 @@ BoundStatement Binder::BindCopyFrom(CopyStatement &stmt) {
158
158
  result.types = {LogicalType::BIGINT};
159
159
  result.names = {"Count"};
160
160
 
161
- D_ASSERT(!stmt.info->table.empty());
161
+ if (stmt.info->table.empty()) {
162
+ throw ParserException("COPY FROM requires a table name to be specified");
163
+ }
162
164
  // COPY FROM a file
163
165
  // generate an insert statement for the the to-be-inserted table
164
166
  InsertStatement insert;
@@ -819,9 +819,10 @@ void DataTable::RevertAppend(idx_t start_row, idx_t count) {
819
819
  //===--------------------------------------------------------------------===//
820
820
  // Indexes
821
821
  //===--------------------------------------------------------------------===//
822
- bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t row_start) {
822
+ PreservedError DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t row_start) {
823
+ PreservedError error;
823
824
  if (indexes.Empty()) {
824
- return true;
825
+ return error;
825
826
  }
826
827
  // first generate the vector of row identifiers
827
828
  Vector row_identifiers(LogicalType::ROW_TYPE);
@@ -832,11 +833,13 @@ bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t
832
833
  // now append the entries to the indices
833
834
  indexes.Scan([&](Index &index) {
834
835
  try {
835
- if (!index.Append(chunk, row_identifiers)) {
836
- append_failed = true;
837
- return true;
838
- }
839
- } catch (...) {
836
+ error = index.Append(chunk, row_identifiers);
837
+ } catch (Exception &ex) {
838
+ error = PreservedError(ex);
839
+ } catch (std::exception &ex) {
840
+ error = PreservedError(ex);
841
+ }
842
+ if (error) {
840
843
  append_failed = true;
841
844
  return true;
842
845
  }
@@ -850,12 +853,11 @@ bool DataTable::AppendToIndexes(TableIndexList &indexes, DataChunk &chunk, row_t
850
853
  for (auto *index : already_appended) {
851
854
  index->Delete(chunk, row_identifiers);
852
855
  }
853
- return false;
854
856
  }
855
- return true;
857
+ return error;
856
858
  }
857
859
 
858
- bool DataTable::AppendToIndexes(DataChunk &chunk, row_t row_start) {
860
+ PreservedError DataTable::AppendToIndexes(DataChunk &chunk, row_t row_start) {
859
861
  D_ASSERT(is_root);
860
862
  return AppendToIndexes(info->indexes, chunk, row_start);
861
863
  }
@@ -1204,9 +1206,9 @@ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
1204
1206
  index->ExecuteExpressions(intermediate, result);
1205
1207
 
1206
1208
  // insert into the index
1207
- if (!index->Insert(lock, result, intermediate.data[intermediate.ColumnCount() - 1])) {
1208
- throw InternalException("Error during WAL replay. Can't create unique index, table contains "
1209
- "duplicate data on indexed column(s).");
1209
+ auto error = index->Insert(lock, result, intermediate.data[intermediate.ColumnCount() - 1]);
1210
+ if (error) {
1211
+ throw InternalException("Error during WAL replay: %s", error.Message());
1210
1212
  }
1211
1213
  }
1212
1214
  }
@@ -36,7 +36,7 @@ void Index::InitializeLock(IndexLock &state) {
36
36
  state.index_lock = unique_lock<mutex>(lock);
37
37
  }
38
38
 
39
- bool Index::Append(DataChunk &entries, Vector &row_identifiers) {
39
+ PreservedError Index::Append(DataChunk &entries, Vector &row_identifiers) {
40
40
  IndexLock state;
41
41
  InitializeLock(state);
42
42
  return Append(state, entries, row_identifiers);
@@ -90,4 +90,15 @@ BlockPointer Index::Serialize(MetaBlockWriter &writer) {
90
90
  throw NotImplementedException("The implementation of this index serialization does not exist.");
91
91
  }
92
92
 
93
+ string Index::AppendRowError(DataChunk &input, idx_t index) {
94
+ string error;
95
+ for (idx_t c = 0; c < input.ColumnCount(); c++) {
96
+ if (c > 0) {
97
+ error += ", ";
98
+ }
99
+ error += input.GetValue(c, index).ToString();
100
+ }
101
+ return error;
102
+ }
103
+
93
104
  } // namespace duckdb
@@ -197,16 +197,16 @@ void LocalTableStorage::FlushToDisk() {
197
197
  optimistic_writer.FinalFlush();
198
198
  }
199
199
 
200
- bool LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, RowGroupCollection &source,
201
- TableIndexList &index_list, const vector<LogicalType> &table_types,
202
- row_t &start_row) {
200
+ PreservedError LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, RowGroupCollection &source,
201
+ TableIndexList &index_list, const vector<LogicalType> &table_types,
202
+ row_t &start_row) {
203
203
  // only need to scan for index append
204
204
  // figure out which columns we need to scan for the set of indexes
205
205
  auto columns = index_list.GetRequiredColumns();
206
206
  // create an empty mock chunk that contains all the correct types for the table
207
207
  DataChunk mock_chunk;
208
208
  mock_chunk.InitializeEmpty(table_types);
209
- bool success = true;
209
+ PreservedError error;
210
210
  source.Scan(transaction, columns, [&](DataChunk &chunk) -> bool {
211
211
  // construct the mock chunk by referencing the required columns
212
212
  for (idx_t i = 0; i < columns.size(); i++) {
@@ -214,28 +214,28 @@ bool LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, RowGroupCo
214
214
  }
215
215
  mock_chunk.SetCardinality(chunk);
216
216
  // append this chunk to the indexes of the table
217
- if (!DataTable::AppendToIndexes(index_list, mock_chunk, start_row)) {
218
- success = false;
217
+ error = DataTable::AppendToIndexes(index_list, mock_chunk, start_row);
218
+ if (error) {
219
219
  return false;
220
220
  }
221
221
  start_row += chunk.size();
222
222
  return true;
223
223
  });
224
- return success;
224
+ return error;
225
225
  }
226
226
 
227
227
  void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppendState &append_state,
228
228
  idx_t append_count, bool append_to_table) {
229
- bool constraint_violated = false;
230
229
  if (append_to_table) {
231
230
  table->InitializeAppend(transaction, append_state, append_count);
232
231
  }
232
+ PreservedError error;
233
233
  if (append_to_table) {
234
234
  // appending: need to scan entire
235
235
  row_groups->Scan(transaction, [&](DataChunk &chunk) -> bool {
236
236
  // append this chunk to the indexes of the table
237
- if (!table->AppendToIndexes(chunk, append_state.current_row)) {
238
- constraint_violated = true;
237
+ error = table->AppendToIndexes(chunk, append_state.current_row);
238
+ if (error) {
239
239
  return false;
240
240
  }
241
241
  // append to base table
@@ -243,11 +243,10 @@ void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppen
243
243
  return true;
244
244
  });
245
245
  } else {
246
- constraint_violated = !AppendToIndexes(transaction, *row_groups, table->info->indexes, table->GetTypes(),
247
- append_state.current_row);
246
+ error = AppendToIndexes(transaction, *row_groups, table->info->indexes, table->GetTypes(),
247
+ append_state.current_row);
248
248
  }
249
- if (constraint_violated) {
250
- PreservedError error;
249
+ if (error) {
251
250
  // need to revert the append
252
251
  row_t current_row = append_state.row_start;
253
252
  // remove the data from the indexes, if there are any indexes
@@ -273,10 +272,7 @@ void LocalTableStorage::AppendToIndexes(DuckTransaction &transaction, TableAppen
273
272
  if (append_to_table) {
274
273
  table->RevertAppendInternal(append_state.row_start, append_count);
275
274
  }
276
- if (error) {
277
- error.Throw();
278
- }
279
- throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
275
+ error.Throw();
280
276
  }
281
277
  }
282
278
 
@@ -412,8 +408,9 @@ void LocalStorage::Append(LocalAppendState &state, DataChunk &chunk) {
412
408
  // append to unique indices (if any)
413
409
  auto storage = state.storage;
414
410
  idx_t base_id = MAX_ROW_ID + storage->row_groups->GetTotalRows() + state.append_state.total_append_count;
415
- if (!DataTable::AppendToIndexes(storage->indexes, chunk, base_id)) {
416
- throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
411
+ auto error = DataTable::AppendToIndexes(storage->indexes, chunk, base_id);
412
+ if (error) {
413
+ error.Throw();
417
414
  }
418
415
 
419
416
  //! Append the chunk to the local storage
@@ -434,9 +431,9 @@ void LocalStorage::LocalMerge(DataTable *table, RowGroupCollection &collection)
434
431
  if (!storage->indexes.Empty()) {
435
432
  // append data to indexes if required
436
433
  row_t base_id = MAX_ROW_ID + storage->row_groups->GetTotalRows();
437
- bool success = storage->AppendToIndexes(transaction, collection, storage->indexes, table->GetTypes(), base_id);
438
- if (!success) {
439
- throw ConstraintException("PRIMARY KEY or UNIQUE constraint violated: duplicated key");
434
+ auto error = storage->AppendToIndexes(transaction, collection, storage->indexes, table->GetTypes(), base_id);
435
+ if (error) {
436
+ error.Throw();
440
437
  }
441
438
  }
442
439
  storage->row_groups->MergeStorage(collection);