duckdb 0.7.2-dev3666.0 → 0.7.2-dev3763.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/package.json +1 -1
  2. package/src/database.cpp +0 -1
  3. package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
  4. package/src/duckdb/extension/json/json_functions/read_json.cpp +4 -4
  5. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +1 -1
  6. package/src/duckdb/extension/json/json_scan.cpp +16 -12
  7. package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -4
  8. package/src/duckdb/src/common/compressed_file_system.cpp +2 -2
  9. package/src/duckdb/src/common/exception.cpp +17 -0
  10. package/src/duckdb/src/common/exception_format_value.cpp +14 -0
  11. package/src/duckdb/src/common/file_system.cpp +53 -31
  12. package/src/duckdb/src/common/local_file_system.cpp +5 -3
  13. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  14. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  15. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +1 -1
  16. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  17. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +3 -3
  18. package/src/duckdb/src/common/serializer.cpp +1 -1
  19. package/src/duckdb/src/common/sort/radix_sort.cpp +5 -5
  20. package/src/duckdb/src/common/string_util.cpp +6 -2
  21. package/src/duckdb/src/common/types/bit.cpp +2 -2
  22. package/src/duckdb/src/common/types/blob.cpp +2 -2
  23. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  24. package/src/duckdb/src/common/types/date.cpp +1 -1
  25. package/src/duckdb/src/common/types/decimal.cpp +2 -2
  26. package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
  27. package/src/duckdb/src/common/types/time.cpp +1 -1
  28. package/src/duckdb/src/common/types/vector.cpp +7 -7
  29. package/src/duckdb/src/common/types.cpp +1 -1
  30. package/src/duckdb/src/common/windows_util.cpp +2 -2
  31. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +1 -1
  32. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +1 -1
  33. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +1 -1
  34. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  35. package/src/duckdb/src/execution/join_hashtable.cpp +3 -3
  36. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
  37. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  38. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  39. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  40. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1 -1
  41. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +2 -2
  42. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +91 -30
  43. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +1 -1
  44. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +1 -1
  45. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +2 -2
  46. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +3 -3
  47. package/src/duckdb/src/execution/window_segment_tree.cpp +1 -1
  48. package/src/duckdb/src/function/pragma/pragma_queries.cpp +2 -2
  49. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -1
  50. package/src/duckdb/src/function/scalar/string/concat.cpp +1 -1
  51. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  52. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  53. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  54. package/src/duckdb/src/function/table/read_csv.cpp +3 -0
  55. package/src/duckdb/src/function/table/table_scan.cpp +7 -3
  56. package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -6
  57. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +2 -2
  58. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +26 -0
  59. package/src/duckdb/src/include/duckdb/common/file_system.hpp +6 -0
  60. package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -9
  61. package/src/duckdb/src/include/duckdb/common/http_state.hpp +2 -2
  62. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/common/serializer/buffered_serializer.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +10 -10
  66. package/src/duckdb/src/include/duckdb/common/string_util.hpp +20 -0
  67. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
  69. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -3
  70. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +4 -4
  71. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +8 -8
  72. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +3 -3
  74. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +1 -1
  75. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -2
  78. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +2 -2
  79. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -3
  80. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +2 -2
  81. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/main/client_data.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +3 -3
  85. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +1 -1
  86. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/parser/keyword_helper.hpp +5 -0
  88. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +4 -0
  89. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +1 -1
  90. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
  91. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +5 -5
  93. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
  94. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -1
  95. package/src/duckdb/src/main/client_context.cpp +4 -4
  96. package/src/duckdb/src/main/db_instance_cache.cpp +5 -3
  97. package/src/duckdb/src/main/extension/extension_install.cpp +22 -18
  98. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +5 -5
  99. package/src/duckdb/src/parser/expression/collate_expression.cpp +1 -1
  100. package/src/duckdb/src/parser/keyword_helper.cpp +11 -1
  101. package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
  102. package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -2
  103. package/src/duckdb/src/parser/tableref.cpp +1 -1
  104. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +7 -1
  105. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +1 -1
  106. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
  107. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -2
  108. package/src/duckdb/src/storage/data_table.cpp +75 -44
  109. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  110. package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
  111. package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
  112. package/src/duckdb/src/storage/storage_manager.cpp +3 -0
  113. package/src/duckdb/src/storage/table/row_group.cpp +11 -11
  114. package/src/duckdb/src/storage/table/scan_state.cpp +1 -1
  115. package/src/duckdb/src/storage/table/update_segment.cpp +6 -6
@@ -297,7 +297,7 @@ void Vector::Resize(idx_t cur_size, idx_t new_size) {
297
297
  }
298
298
  for (auto &data_to_resize : to_resize) {
299
299
  if (!data_to_resize.is_nested) {
300
- auto new_data = make_unsafe_array<data_t>(new_size * data_to_resize.type_size);
300
+ auto new_data = make_unsafe_uniq_array<data_t>(new_size * data_to_resize.type_size);
301
301
  memcpy(new_data.get(), data_to_resize.data, cur_size * data_to_resize.type_size * sizeof(data_t));
302
302
  data_to_resize.buffer->SetData(std::move(new_data));
303
303
  data_to_resize.vec.data = data_to_resize.buffer->GetData();
@@ -920,7 +920,7 @@ void Vector::Serialize(idx_t count, Serializer &serializer) {
920
920
  if (TypeIsConstantSize(type.InternalType())) {
921
921
  // constant size type: simple copy
922
922
  idx_t write_size = GetTypeIdSize(type.InternalType()) * count;
923
- auto ptr = make_unsafe_array<data_t>(write_size);
923
+ auto ptr = make_unsafe_uniq_array<data_t>(write_size);
924
924
  VectorOperations::WriteToStorage(*this, count, ptr.get());
925
925
  serializer.WriteData(ptr.get(), write_size);
926
926
  } else {
@@ -947,7 +947,7 @@ void Vector::Serialize(idx_t count, Serializer &serializer) {
947
947
  auto list_size = ListVector::GetListSize(*this);
948
948
 
949
949
  // serialize the list entries in a flat array
950
- auto data = make_unsafe_array<list_entry_t>(count);
950
+ auto data = make_unsafe_uniq_array<list_entry_t>(count);
951
951
  auto source_array = (list_entry_t *)vdata.data;
952
952
  for (idx_t i = 0; i < count; i++) {
953
953
  auto idx = vdata.sel->get_index(i);
@@ -988,7 +988,7 @@ void Vector::FormatSerialize(FormatSerializer &serializer, idx_t count) {
988
988
  if (TypeIsConstantSize(logical_type.InternalType())) {
989
989
  // constant size type: simple copy
990
990
  idx_t write_size = GetTypeIdSize(logical_type.InternalType()) * count;
991
- auto ptr = make_unsafe_array<data_t>(write_size);
991
+ auto ptr = make_unsafe_uniq_array<data_t>(write_size);
992
992
  VectorOperations::WriteToStorage(*this, count, ptr.get());
993
993
  serializer.WriteProperty("data", ptr.get(), write_size);
994
994
  } else {
@@ -1027,7 +1027,7 @@ void Vector::FormatSerialize(FormatSerializer &serializer, idx_t count) {
1027
1027
  auto list_size = ListVector::GetListSize(*this);
1028
1028
 
1029
1029
  // serialize the list entries in a flat array
1030
- auto entries = make_unsafe_array<list_entry_t>(count);
1030
+ auto entries = make_unsafe_uniq_array<list_entry_t>(count);
1031
1031
  auto source_array = (list_entry_t *)vdata.data;
1032
1032
  for (idx_t i = 0; i < count; i++) {
1033
1033
  auto idx = vdata.sel->get_index(i);
@@ -1071,7 +1071,7 @@ void Vector::FormatDeserialize(FormatDeserializer &deserializer, idx_t count) {
1071
1071
  if (TypeIsConstantSize(logical_type.InternalType())) {
1072
1072
  // constant size type: read fixed amount of data
1073
1073
  auto column_size = GetTypeIdSize(logical_type.InternalType()) * count;
1074
- auto ptr = make_unsafe_array<data_t>(column_size);
1074
+ auto ptr = make_unsafe_uniq_array<data_t>(column_size);
1075
1075
  deserializer.ReadProperty("data", ptr.get(), column_size);
1076
1076
 
1077
1077
  VectorOperations::ReadFromStorage(ptr.get(), count, *this);
@@ -1158,7 +1158,7 @@ void Vector::Deserialize(idx_t count, Deserializer &source) {
1158
1158
  if (TypeIsConstantSize(type.InternalType())) {
1159
1159
  // constant size type: read fixed amount of data from
1160
1160
  auto column_size = GetTypeIdSize(type.InternalType()) * count;
1161
- auto ptr = make_unsafe_array<data_t>(column_size);
1161
+ auto ptr = make_unsafe_uniq_array<data_t>(column_size);
1162
1162
  source.ReadData(ptr.get(), column_size);
1163
1163
 
1164
1164
  VectorOperations::ReadFromStorage(ptr.get(), count, *this);
@@ -349,7 +349,7 @@ string LogicalType::ToString() const {
349
349
  auto &child_types = StructType::GetChildTypes(*this);
350
350
  string ret = "STRUCT(";
351
351
  for (size_t i = 0; i < child_types.size(); i++) {
352
- ret += KeywordHelper::WriteOptionallyQuoted(child_types[i].first) + " " + child_types[i].second.ToString();
352
+ ret += StringUtil::Format("%s %s", SQLIdentifier(child_types[i].first), child_types[i].second);
353
353
  if (i < child_types.size() - 1) {
354
354
  ret += ", ";
355
355
  }
@@ -11,7 +11,7 @@ std::wstring WindowsUtil::UTF8ToUnicode(const char *input) {
11
11
  if (result_size == 0) {
12
12
  throw IOException("Failure in MultiByteToWideChar");
13
13
  }
14
- auto buffer = make_unsafe_array<wchar_t>(result_size);
14
+ auto buffer = make_unsafe_uniq_array<wchar_t>(result_size);
15
15
  result_size = MultiByteToWideChar(CP_UTF8, 0, input, -1, buffer.get(), result_size);
16
16
  if (result_size == 0) {
17
17
  throw IOException("Failure in MultiByteToWideChar");
@@ -26,7 +26,7 @@ static string WideCharToMultiByteWrapper(LPCWSTR input, uint32_t code_page) {
26
26
  if (result_size == 0) {
27
27
  throw IOException("Failure in WideCharToMultiByte");
28
28
  }
29
- auto buffer = make_unsafe_array<char>(result_size);
29
+ auto buffer = make_unsafe_uniq_array<char>(result_size);
30
30
  result_size = WideCharToMultiByte(code_page, 0, input, -1, buffer.get(), result_size, 0, 0);
31
31
  if (result_size == 0) {
32
32
  throw IOException("Failure in WideCharToMultiByte");
@@ -179,7 +179,7 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
179
179
 
180
180
  // state_buffer holds the state for each list of this chunk
181
181
  idx_t size = aggr.function.state_size();
182
- auto state_buffer = make_unsafe_array<data_t>(size * count);
182
+ auto state_buffer = make_unsafe_uniq_array<data_t>(size * count);
183
183
 
184
184
  // state vector for initialize and finalize
185
185
  StateVector state_vector(count, info.aggr_expr->Copy());
@@ -77,7 +77,7 @@ static void AppendFilteredToResult(Vector &lambda_vector, list_entry_t *result_e
77
77
  }
78
78
 
79
79
  // found a true value
80
- if (lambda_validity.RowIsValid(entry) && lambda_values[entry] > 0) {
80
+ if (lambda_validity.RowIsValid(entry) && lambda_values[entry]) {
81
81
  true_sel.set_index(true_count++, i);
82
82
  curr_list_len++;
83
83
  }
@@ -94,7 +94,7 @@ static void PrintfFunction(DataChunk &args, ExpressionState &state, Vector &resu
94
94
 
95
95
  // now gather all the format arguments
96
96
  vector<duckdb_fmt::basic_format_arg<CTX>> format_args;
97
- vector<unsafe_array_ptr<data_t>> string_args;
97
+ vector<unsafe_unique_array<data_t>> string_args;
98
98
 
99
99
  for (idx_t col_idx = 1; col_idx < args.ColumnCount(); col_idx++) {
100
100
  auto &col = args.data[col_idx];
@@ -390,7 +390,7 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendSta
390
390
  }
391
391
  TupleDataCollection::ToUnifiedFormat(state.chunk_state, state.group_chunk);
392
392
  if (!state.group_data) {
393
- state.group_data = make_unsafe_array<UnifiedVectorFormat>(state.group_chunk.ColumnCount());
393
+ state.group_data = make_unsafe_uniq_array<UnifiedVectorFormat>(state.group_chunk.ColumnCount());
394
394
  }
395
395
  TupleDataCollection::GetVectorData(state.chunk_state, state.group_data.get());
396
396
 
@@ -142,7 +142,7 @@ static idx_t FilterNullValues(UnifiedVectorFormat &vdata, const SelectionVector
142
142
  return result_count;
143
143
  }
144
144
 
145
- idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unsafe_array_ptr<UnifiedVectorFormat> &key_data,
145
+ idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unsafe_unique_array<UnifiedVectorFormat> &key_data,
146
146
  const SelectionVector *&current_sel, SelectionVector &sel, bool build_side) {
147
147
  key_data = keys.ToUnifiedFormat();
148
148
 
@@ -197,7 +197,7 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
197
197
  }
198
198
 
199
199
  // prepare the keys for processing
200
- unsafe_array_ptr<UnifiedVectorFormat> key_data;
200
+ unsafe_unique_array<UnifiedVectorFormat> key_data;
201
201
  const SelectionVector *current_sel;
202
202
  SelectionVector sel(STANDARD_VECTOR_SIZE);
203
203
  idx_t added_count = PrepareKeys(keys, key_data, current_sel, sel, true);
@@ -332,7 +332,7 @@ unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys
332
332
  auto ss = make_uniq<ScanStructure>(*this);
333
333
 
334
334
  if (join_type != JoinType::INNER) {
335
- ss->found_match = make_unsafe_array<bool>(STANDARD_VECTOR_SIZE);
335
+ ss->found_match = make_unsafe_uniq_array<bool>(STANDARD_VECTOR_SIZE);
336
336
  memset(ss->found_match.get(), 0, sizeof(bool) * STANDARD_VECTOR_SIZE);
337
337
  }
338
338
 
@@ -39,7 +39,7 @@ struct AggregateState {
39
39
  for (auto &aggregate : aggregate_expressions) {
40
40
  D_ASSERT(aggregate->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
41
41
  auto &aggr = aggregate->Cast<BoundAggregateExpression>();
42
- auto state = make_unsafe_array<data_t>(aggr.function.state_size());
42
+ auto state = make_unsafe_uniq_array<data_t>(aggr.function.state_size());
43
43
  aggr.function.initialize(state.get());
44
44
  aggregates.push_back(std::move(state));
45
45
  bind_data.push_back(aggr.bind_info.get());
@@ -69,7 +69,7 @@ struct AggregateState {
69
69
  }
70
70
 
71
71
  //! The aggregate values
72
- vector<unsafe_array_ptr<data_t>> aggregates;
72
+ vector<unsafe_unique_array<data_t>> aggregates;
73
73
  //! The bind data
74
74
  vector<FunctionData *> bind_data;
75
75
  //! The destructors
@@ -10,7 +10,7 @@ void OuterJoinMarker::Initialize(idx_t count_p) {
10
10
  return;
11
11
  }
12
12
  this->count = count_p;
13
- found_match = make_unsafe_array<bool>(count);
13
+ found_match = make_unsafe_uniq_array<bool>(count);
14
14
  Reset();
15
15
  }
16
16
 
@@ -25,7 +25,7 @@ bool PerfectHashJoinExecutor::BuildPerfectHashTable(LogicalType &key_type) {
25
25
  }
26
26
 
27
27
  // and for duplicate_checking
28
- bitmap_build_idx = make_unsafe_array<bool>(build_size);
28
+ bitmap_build_idx = make_unsafe_uniq_array<bool>(build_size);
29
29
  memset(bitmap_build_idx.get(), 0, sizeof(bool) * build_size); // set false
30
30
 
31
31
  // Now fill columns with build data
@@ -72,7 +72,7 @@ void PhysicalRangeJoin::GlobalSortedTable::Combine(LocalSortedTable &ltable) {
72
72
  }
73
73
 
74
74
  void PhysicalRangeJoin::GlobalSortedTable::IntializeMatches() {
75
- found_match = make_unsafe_array<bool>(Count());
75
+ found_match = make_unsafe_uniq_array<bool>(Count());
76
76
  memset(found_match.get(), 0, sizeof(bool) * Count());
77
77
  }
78
78
 
@@ -1425,7 +1425,7 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start, idx_t &line_start) {
1425
1425
  GetLineNumberStr(linenr, linenr_estimated));
1426
1426
  }
1427
1427
 
1428
- buffer = make_unsafe_array<char>(buffer_read_size + remaining + 1);
1428
+ buffer = make_unsafe_uniq_array<char>(buffer_read_size + remaining + 1);
1429
1429
  buffer_size = remaining + buffer_read_size;
1430
1430
  if (remaining > 0) {
1431
1431
  // remaining from last buffer: copy it here
@@ -51,8 +51,8 @@ static void WriteCopyStatement(FileSystem &fs, stringstream &ss, CopyInfo &info,
51
51
  ss << KeywordHelper::WriteOptionallyQuoted(exported_table.schema_name) << ".";
52
52
  }
53
53
 
54
- ss << KeywordHelper::WriteOptionallyQuoted(exported_table.table_name) << " FROM '" << exported_table.file_path
55
- << "' (";
54
+ ss << StringUtil::Format("%s FROM %s (", SQLIdentifier(exported_table.table_name),
55
+ SQLString(exported_table.file_path));
56
56
 
57
57
  // write the copy options
58
58
  ss << "FORMAT '" << info.format << "'";
@@ -101,7 +101,9 @@ public:
101
101
  unique_ptr<RowGroupCollection> local_collection;
102
102
  optional_ptr<OptimisticDataWriter> writer;
103
103
  // Rows that have been updated by a DO UPDATE conflict
104
- unordered_set<row_t> updated_rows;
104
+ unordered_set<row_t> updated_global_rows;
105
+ // Rows in the transaction-local storage that have been updated by a DO UPDATE conflict
106
+ unordered_set<row_t> updated_local_rows;
105
107
  idx_t update_count = 0;
106
108
  };
107
109
 
@@ -177,8 +179,11 @@ void CheckOnConflictCondition(ExecutionContext &context, DataChunk &conflicts, c
177
179
  result.SetCardinality(conflicts.size());
178
180
  }
179
181
 
180
- void PhysicalInsert::CombineExistingAndInsertTuples(DataChunk &result, DataChunk &scan_chunk, DataChunk &input_chunk,
181
- ClientContext &client) const {
182
+ static void CombineExistingAndInsertTuples(DataChunk &result, DataChunk &scan_chunk, DataChunk &input_chunk,
183
+ ClientContext &client, const PhysicalInsert &op) {
184
+ auto &types_to_fetch = op.types_to_fetch;
185
+ auto &insert_types = op.insert_types;
186
+
182
187
  if (types_to_fetch.empty()) {
183
188
  // We have not scanned the initial table, so we can just duplicate the initial chunk
184
189
  result.Initialize(client, input_chunk.GetTypes());
@@ -218,14 +223,12 @@ void PhysicalInsert::CombineExistingAndInsertTuples(DataChunk &result, DataChunk
218
223
  result.SetCardinality(input_chunk.size());
219
224
  }
220
225
 
221
- idx_t PhysicalInsert::PerformOnConflictAction(ExecutionContext &context, DataChunk &chunk, TableCatalogEntry &table,
222
- Vector &row_ids) const {
223
- if (action_type == OnConflictAction::NOTHING) {
224
- return 0;
225
- }
226
-
227
- DataChunk update_chunk; // contains only the to-update columns
226
+ static void CreateUpdateChunk(ExecutionContext &context, DataChunk &chunk, TableCatalogEntry &table, Vector &row_ids,
227
+ DataChunk &update_chunk, const PhysicalInsert &op) {
228
228
 
229
+ auto &do_update_condition = op.do_update_condition;
230
+ auto &set_types = op.set_types;
231
+ auto &set_expressions = op.set_expressions;
229
232
  // Check the optional condition for the DO UPDATE clause, to filter which rows will be updated
230
233
  if (do_update_condition) {
231
234
  DataChunk do_update_filter_result;
@@ -256,19 +259,43 @@ idx_t PhysicalInsert::PerformOnConflictAction(ExecutionContext &context, DataChu
256
259
  ExpressionExecutor executor(context.client, set_expressions);
257
260
  executor.Execute(chunk, update_chunk);
258
261
  update_chunk.SetCardinality(chunk);
262
+ }
263
+
264
+ template <bool GLOBAL>
265
+ static idx_t PerformOnConflictAction(ExecutionContext &context, DataChunk &chunk, TableCatalogEntry &table,
266
+ Vector &row_ids, const PhysicalInsert &op) {
267
+
268
+ if (op.action_type == OnConflictAction::NOTHING) {
269
+ return 0;
270
+ }
271
+ auto &set_columns = op.set_columns;
272
+
273
+ DataChunk update_chunk;
274
+ CreateUpdateChunk(context, chunk, table, row_ids, update_chunk, op);
259
275
 
260
276
  auto &data_table = table.GetStorage();
261
277
  // Perform the update, using the results of the SET expressions
262
- data_table.Update(table, context.client, row_ids, set_columns, update_chunk);
278
+ if (GLOBAL) {
279
+ data_table.Update(table, context.client, row_ids, set_columns, update_chunk);
280
+ } else {
281
+ auto &local_storage = LocalStorage::Get(context.client, data_table.db);
282
+ // Perform the update, using the results of the SET expressions
283
+ local_storage.Update(data_table, row_ids, set_columns, update_chunk);
284
+ }
263
285
  return update_chunk.size();
264
286
  }
265
287
 
266
288
  // TODO: should we use a hash table to keep track of this instead?
267
- void PhysicalInsert::RegisterUpdatedRows(InsertLocalState &lstate, const Vector &row_ids, idx_t count) const {
289
+ template <bool GLOBAL>
290
+ static void RegisterUpdatedRows(InsertLocalState &lstate, const Vector &row_ids, idx_t count) {
268
291
  // Insert all rows, if any of the rows has already been updated before, we throw an error
269
292
  auto data = FlatVector::GetData<row_t>(row_ids);
293
+
294
+ // The rowids in the transaction-local ART aren't final yet so we have to separately keep track of the two sets of
295
+ // rowids
296
+ unordered_set<row_t> &updated_rows = GLOBAL ? lstate.updated_global_rows : lstate.updated_local_rows;
270
297
  for (idx_t i = 0; i < count; i++) {
271
- auto result = lstate.updated_rows.insert(data[i]);
298
+ auto result = updated_rows.insert(data[i]);
272
299
  if (result.second == false) {
273
300
  throw InvalidInputException(
274
301
  "ON CONFLICT DO UPDATE can not update the same row twice in the same command, Ensure that no rows "
@@ -277,20 +304,25 @@ void PhysicalInsert::RegisterUpdatedRows(InsertLocalState &lstate, const Vector
277
304
  }
278
305
  }
279
306
 
280
- idx_t PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionContext &context,
281
- InsertLocalState &lstate) const {
282
- auto &data_table = table.GetStorage();
283
- if (action_type == OnConflictAction::THROW) {
284
- data_table.VerifyAppendConstraints(table, context.client, lstate.insert_chunk, nullptr);
285
- return 0;
286
- }
287
- // Check whether any conflicts arise, and if they all meet the conflict_target + condition
288
- // If that's not the case - We throw the first error
307
+ template <bool GLOBAL>
308
+ static idx_t HandleInsertConflicts(TableCatalogEntry &table, ExecutionContext &context, InsertLocalState &lstate,
309
+ DataTable &data_table, const PhysicalInsert &op) {
310
+ auto &types_to_fetch = op.types_to_fetch;
311
+ auto &on_conflict_condition = op.on_conflict_condition;
312
+ auto &conflict_target = op.conflict_target;
313
+ auto &columns_to_fetch = op.columns_to_fetch;
314
+
315
+ auto &local_storage = LocalStorage::Get(context.client, data_table.db);
289
316
 
290
317
  // We either want to do nothing, or perform an update when conflicts arise
291
318
  ConflictInfo conflict_info(conflict_target);
292
319
  ConflictManager conflict_manager(VerifyExistenceType::APPEND, lstate.insert_chunk.size(), &conflict_info);
293
- data_table.VerifyAppendConstraints(table, context.client, lstate.insert_chunk, &conflict_manager);
320
+ if (GLOBAL) {
321
+ data_table.VerifyAppendConstraints(table, context.client, lstate.insert_chunk, &conflict_manager);
322
+ } else {
323
+ DataTable::VerifyUniqueIndexes(local_storage.GetIndexes(data_table), context.client, lstate.insert_chunk,
324
+ &conflict_manager);
325
+ }
294
326
  conflict_manager.Finalize();
295
327
  if (conflict_manager.ConflictCount() == 0) {
296
328
  // No conflicts found, 0 updates performed
@@ -309,18 +341,25 @@ idx_t PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionCont
309
341
  conflict_chunk.Slice(conflicts.Selection(), conflicts.Count());
310
342
  conflict_chunk.SetCardinality(conflicts.Count());
311
343
 
344
+ // Holds the pins for the fetched rows
345
+ unique_ptr<ColumnFetchState> fetch_state;
312
346
  if (!types_to_fetch.empty()) {
313
347
  D_ASSERT(scan_chunk.size() == 0);
314
348
  // When these values are required for the conditions or the SET expressions,
315
349
  // then we scan the existing table for the conflicting tuples, using the rowids
316
350
  scan_chunk.Initialize(context.client, types_to_fetch);
317
- auto fetch_state = make_uniq<ColumnFetchState>();
318
- auto &transaction = DuckTransaction::Get(context.client, table.catalog);
319
- data_table.Fetch(transaction, scan_chunk, columns_to_fetch, row_ids, conflicts.Count(), *fetch_state);
351
+ fetch_state = make_uniq<ColumnFetchState>();
352
+ if (GLOBAL) {
353
+ auto &transaction = DuckTransaction::Get(context.client, table.catalog);
354
+ data_table.Fetch(transaction, scan_chunk, columns_to_fetch, row_ids, conflicts.Count(), *fetch_state);
355
+ } else {
356
+ local_storage.FetchChunk(data_table, row_ids, conflicts.Count(), columns_to_fetch, scan_chunk,
357
+ *fetch_state);
358
+ }
320
359
  }
321
360
 
322
361
  // Splice the Input chunk and the fetched chunk together
323
- CombineExistingAndInsertTuples(combined_chunk, scan_chunk, conflict_chunk, context.client);
362
+ CombineExistingAndInsertTuples(combined_chunk, scan_chunk, conflict_chunk, context.client, op);
324
363
 
325
364
  if (on_conflict_condition) {
326
365
  DataChunk conflict_condition_result;
@@ -338,14 +377,19 @@ idx_t PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionCont
338
377
  }
339
378
  combined_chunk.Slice(sel.Selection(), sel.Count());
340
379
  row_ids.Slice(sel.Selection(), sel.Count());
341
- data_table.VerifyAppendConstraints(table, context.client, combined_chunk, nullptr);
380
+ if (GLOBAL) {
381
+ data_table.VerifyAppendConstraints(table, context.client, combined_chunk, nullptr);
382
+ } else {
383
+ DataTable::VerifyUniqueIndexes(local_storage.GetIndexes(data_table), context.client,
384
+ lstate.insert_chunk, nullptr);
385
+ }
342
386
  throw InternalException("The previous operation was expected to throw but didn't");
343
387
  }
344
388
  }
345
389
 
346
- RegisterUpdatedRows(lstate, row_ids, combined_chunk.size());
390
+ RegisterUpdatedRows<GLOBAL>(lstate, row_ids, combined_chunk.size());
347
391
 
348
- idx_t updated_tuples = PerformOnConflictAction(context, combined_chunk, table, row_ids);
392
+ idx_t updated_tuples = PerformOnConflictAction<GLOBAL>(context, combined_chunk, table, row_ids, op);
349
393
 
350
394
  // Remove the conflicting tuples from the insert chunk
351
395
  SelectionVector sel_vec(lstate.insert_chunk.size());
@@ -356,6 +400,23 @@ idx_t PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionCont
356
400
  return updated_tuples;
357
401
  }
358
402
 
403
+ idx_t PhysicalInsert::OnConflictHandling(TableCatalogEntry &table, ExecutionContext &context,
404
+ InsertLocalState &lstate) const {
405
+ auto &data_table = table.GetStorage();
406
+ if (action_type == OnConflictAction::THROW) {
407
+ data_table.VerifyAppendConstraints(table, context.client, lstate.insert_chunk, nullptr);
408
+ return 0;
409
+ }
410
+ // Check whether any conflicts arise, and if they all meet the conflict_target + condition
411
+ // If that's not the case - We throw the first error
412
+ idx_t updated_tuples = 0;
413
+ updated_tuples += HandleInsertConflicts<true>(table, context, lstate, data_table, *this);
414
+ // Also check the transaction-local storage+ART so we can detect conflicts within this transaction
415
+ updated_tuples += HandleInsertConflicts<false>(table, context, lstate, data_table, *this);
416
+
417
+ return updated_tuples;
418
+ }
419
+
359
420
  SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
360
421
  auto &gstate = input.global_state.Cast<InsertGlobalState>();
361
422
  auto &lstate = input.local_state.Cast<InsertLocalState>();
@@ -19,7 +19,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
19
19
  for (auto &aggr_expr : bound_pivot.aggregates) {
20
20
  auto &aggr = (BoundAggregateExpression &)*aggr_expr;
21
21
  // for each aggregate, initialize an empty aggregate state and finalize it immediately
22
- auto state = make_unsafe_array<data_t>(aggr.function.state_size());
22
+ auto state = make_unsafe_uniq_array<data_t>(aggr.function.state_size());
23
23
  aggr.function.initialize(state.get());
24
24
  Vector state_vector(Value::POINTER((uintptr_t)state.get()));
25
25
  Vector result_vector(aggr_expr->return_type);
@@ -17,7 +17,6 @@ PhysicalCreateIndex::PhysicalCreateIndex(LogicalOperator &op, TableCatalogEntry
17
17
  : PhysicalOperator(PhysicalOperatorType::CREATE_INDEX, op.types, estimated_cardinality),
18
18
  table(table_p.Cast<DuckTableEntry>()), info(std::move(info)),
19
19
  unbound_expressions(std::move(unbound_expressions)) {
20
- D_ASSERT(table_p.IsDuckTable());
21
20
  // convert virtual column ids to storage column ids
22
21
  for (auto &column_id : column_ids) {
23
22
  storage_ids.push_back(table.GetColumns().LogicalToPhysical(LogicalIndex(column_id)).index);
@@ -136,6 +135,7 @@ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event,
136
135
  auto &schema = table.schema;
137
136
  auto index_entry = schema.CreateIndex(context, *info, table).get();
138
137
  if (!index_entry) {
138
+ D_ASSERT(info->on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT);
139
139
  // index already exists, but error ignored because of IF NOT EXISTS
140
140
  return SinkFinalizeType::READY;
141
141
  }
@@ -23,11 +23,11 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(ClientContext &context, All
23
23
  tuple_size = layout.GetRowWidth();
24
24
 
25
25
  // allocate and null initialize the data
26
- owned_data = make_unsafe_array<data_t>(tuple_size * total_groups);
26
+ owned_data = make_unsafe_uniq_array<data_t>(tuple_size * total_groups);
27
27
  data = owned_data.get();
28
28
 
29
29
  // set up the empty payloads for every tuple, and initialize the "occupied" flag to false
30
- group_is_set = make_unsafe_array<bool>(total_groups);
30
+ group_is_set = make_unsafe_uniq_array<bool>(total_groups);
31
31
  memset(group_is_set.get(), 0, total_groups * sizeof(bool));
32
32
 
33
33
  // initialize the hash table for each entry
@@ -334,7 +334,7 @@ public:
334
334
  //! The current position to scan the HT for output tuples
335
335
  idx_t ht_index;
336
336
  //! The set of aggregate scan states
337
- unsafe_array_ptr<TupleDataParallelScanState> ht_scan_states;
337
+ unsafe_unique_array<TupleDataParallelScanState> ht_scan_states;
338
338
  atomic<bool> initialized;
339
339
  atomic<bool> finished;
340
340
  };
@@ -404,7 +404,7 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
404
404
  for (idx_t i = 0; i < op.aggregates.size(); i++) {
405
405
  D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
406
406
  auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
407
- auto aggr_state = make_unsafe_array<data_t>(aggr.function.state_size());
407
+ auto aggr_state = make_unsafe_uniq_array<data_t>(aggr.function.state_size());
408
408
  aggr.function.initialize(aggr_state.get());
409
409
 
410
410
  AggregateInputData aggr_input_data(aggr.bind_info.get(), Allocator::DefaultAllocator());
@@ -433,7 +433,7 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
433
433
  lock_guard<mutex> l(state.lock);
434
434
  if (!state.initialized) {
435
435
  auto &finalized_hts = gstate.finalized_hts;
436
- state.ht_scan_states = make_unsafe_array<TupleDataParallelScanState>(finalized_hts.size());
436
+ state.ht_scan_states = make_unsafe_uniq_array<TupleDataParallelScanState>(finalized_hts.size());
437
437
 
438
438
  const auto &layout = gstate.finalized_hts[0]->GetDataCollection().GetLayout();
439
439
  vector<column_t> column_ids;
@@ -309,7 +309,7 @@ void WindowSegmentTree::ConstructTree() {
309
309
  level_nodes = (level_nodes + (TREE_FANOUT - 1)) / TREE_FANOUT;
310
310
  internal_nodes += level_nodes;
311
311
  } while (level_nodes > 1);
312
- levels_flat_native = make_unsafe_array<data_t>(internal_nodes * state.size());
312
+ levels_flat_native = make_unsafe_uniq_array<data_t>(internal_nodes * state.size());
313
313
  levels_flat_start.push_back(0);
314
314
 
315
315
  idx_t levels_flat_offset = 0;
@@ -41,7 +41,7 @@ string PragmaShowTables(ClientContext &context, const FunctionParameters &parame
41
41
  )
42
42
  SELECT "name"
43
43
  FROM db_objects
44
- ORDER BY "name";)EOF", where_clause, where_clause, where_clause);
44
+ ORDER BY "name";)EOF", where_clause, where_clause);
45
45
  // clang-format on
46
46
 
47
47
  return pragma_query;
@@ -141,7 +141,7 @@ string PragmaImportDatabase(ClientContext &context, const FunctionParameters &pa
141
141
  auto handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
142
142
  FileSystem::DEFAULT_COMPRESSION);
143
143
  auto fsize = fs.GetFileSize(*handle);
144
- auto buffer = make_unsafe_array<char>(fsize);
144
+ auto buffer = make_unsafe_uniq_array<char>(fsize);
145
145
  fs.Read(*handle, buffer.get(), fsize);
146
146
  auto query = string(buffer.get(), fsize);
147
147
  // Replace the placeholder with the path provided to IMPORT
@@ -408,7 +408,7 @@ string StrfTimeFormat::Format(timestamp_t timestamp, const string &format_str) {
408
408
  auto time = Timestamp::GetTime(timestamp);
409
409
 
410
410
  auto len = format.GetLength(date, time, 0, nullptr);
411
- auto result = make_unsafe_array<char>(len);
411
+ auto result = make_unsafe_uniq_array<char>(len);
412
412
  format.FormatString(date, time, result.get());
413
413
  return string(result.get(), len);
414
414
  }
@@ -118,7 +118,7 @@ static void TemplatedConcatWS(DataChunk &args, string_t *sep_data, const Selecti
118
118
  const SelectionVector &rsel, idx_t count, Vector &result) {
119
119
  vector<idx_t> result_lengths(args.size(), 0);
120
120
  vector<bool> has_results(args.size(), false);
121
- auto orrified_data = make_unsafe_array<UnifiedVectorFormat>(args.ColumnCount() - 1);
121
+ auto orrified_data = make_unsafe_uniq_array<UnifiedVectorFormat>(args.ColumnCount() - 1);
122
122
  for (idx_t col_idx = 1; col_idx < args.ColumnCount(); col_idx++) {
123
123
  args.data[col_idx].ToUnifiedFormat(args.size(), orrified_data[col_idx - 1]);
124
124
  }
@@ -395,11 +395,11 @@ bool ILikeOperatorFunction(string_t &str, string_t &pattern, char escape = '\0')
395
395
 
396
396
  // lowercase both the str and the pattern
397
397
  idx_t str_llength = LowerFun::LowerLength(str_data, str_size);
398
- auto str_ldata = make_unsafe_array<char>(str_llength);
398
+ auto str_ldata = make_unsafe_uniq_array<char>(str_llength);
399
399
  LowerFun::LowerCase(str_data, str_size, str_ldata.get());
400
400
 
401
401
  idx_t pat_llength = LowerFun::LowerLength(pat_data, pat_size);
402
- auto pat_ldata = make_unsafe_array<char>(pat_llength);
402
+ auto pat_ldata = make_unsafe_uniq_array<char>(pat_llength);
403
403
  LowerFun::LowerCase(pat_data, pat_size, pat_ldata.get());
404
404
  string_t str_lcase(str_ldata.get(), str_llength);
405
405
  string_t pat_lcase(pat_ldata.get(), pat_llength);
@@ -36,12 +36,12 @@ struct ExportAggregateBindData : public FunctionData {
36
36
  struct CombineState : public FunctionLocalState {
37
37
  idx_t state_size;
38
38
 
39
- unsafe_array_ptr<data_t> state_buffer0, state_buffer1;
39
+ unsafe_unique_array<data_t> state_buffer0, state_buffer1;
40
40
  Vector state_vector0, state_vector1;
41
41
 
42
42
  explicit CombineState(idx_t state_size_p)
43
- : state_size(state_size_p), state_buffer0(make_unsafe_array<data_t>(state_size_p)),
44
- state_buffer1(make_unsafe_array<data_t>(state_size_p)),
43
+ : state_size(state_size_p), state_buffer0(make_unsafe_uniq_array<data_t>(state_size_p)),
44
+ state_buffer1(make_unsafe_uniq_array<data_t>(state_size_p)),
45
45
  state_vector0(Value::POINTER((uintptr_t)state_buffer0.get())),
46
46
  state_vector1(Value::POINTER((uintptr_t)state_buffer1.get())) {
47
47
  }
@@ -55,12 +55,12 @@ static unique_ptr<FunctionLocalState> InitCombineState(ExpressionState &state, c
55
55
 
56
56
  struct FinalizeState : public FunctionLocalState {
57
57
  idx_t state_size;
58
- unsafe_array_ptr<data_t> state_buffer;
58
+ unsafe_unique_array<data_t> state_buffer;
59
59
  Vector addresses;
60
60
 
61
61
  explicit FinalizeState(idx_t state_size_p)
62
62
  : state_size(state_size_p),
63
- state_buffer(make_unsafe_array<data_t>(STANDARD_VECTOR_SIZE * AlignValue(state_size_p))),
63
+ state_buffer(make_unsafe_uniq_array<data_t>(STANDARD_VECTOR_SIZE * AlignValue(state_size_p))),
64
64
  addresses(LogicalType::POINTER) {
65
65
  }
66
66
  };
@@ -88,7 +88,7 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &i
88
88
  bind_data->is_simple = bind_data->options.delimiter.size() == 1 && bind_data->options.escape.size() == 1 &&
89
89
  bind_data->options.quote.size() == 1;
90
90
  if (bind_data->is_simple) {
91
- bind_data->requires_quotes = make_unsafe_array<bool>(256);
91
+ bind_data->requires_quotes = make_unsafe_uniq_array<bool>(256);
92
92
  memset(bind_data->requires_quotes.get(), 0, sizeof(bool) * 256);
93
93
  bind_data->requires_quotes['\n'] = true;
94
94
  bind_data->requires_quotes['\r'] = true;
@@ -586,6 +586,9 @@ bool LineInfo::CanItGetLine(idx_t file_idx, idx_t batch_idx) {
586
586
  if (current_batches.empty() || done) {
587
587
  return true;
588
588
  }
589
+ if (file_idx >= current_batches.size() || current_batches[file_idx].empty()) {
590
+ return true;
591
+ }
589
592
  auto min_value = *current_batches[file_idx].begin();
590
593
  if (min_value >= batch_idx) {
591
594
  return true;