duckdb 0.7.2-dev3402.0 → 0.7.2-dev3515.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/package.json +2 -2
  2. package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
  3. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  4. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  5. package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
  6. package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -4
  7. package/src/duckdb/src/common/compressed_file_system.cpp +2 -2
  8. package/src/duckdb/src/common/file_system.cpp +2 -2
  9. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  10. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  11. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +1 -1
  12. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  13. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +4 -3
  14. package/src/duckdb/src/common/serializer.cpp +1 -1
  15. package/src/duckdb/src/common/sort/radix_sort.cpp +5 -5
  16. package/src/duckdb/src/common/string_util.cpp +2 -2
  17. package/src/duckdb/src/common/types/bit.cpp +2 -2
  18. package/src/duckdb/src/common/types/blob.cpp +2 -2
  19. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  20. package/src/duckdb/src/common/types/date.cpp +1 -1
  21. package/src/duckdb/src/common/types/decimal.cpp +2 -2
  22. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +14 -2
  23. package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
  24. package/src/duckdb/src/common/types/time.cpp +1 -1
  25. package/src/duckdb/src/common/types/vector.cpp +7 -7
  26. package/src/duckdb/src/common/virtual_file_system.cpp +4 -0
  27. package/src/duckdb/src/common/windows_util.cpp +2 -2
  28. package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +6 -3
  29. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -5
  30. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +10 -7
  31. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +1 -1
  32. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  33. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +3 -0
  34. package/src/duckdb/src/execution/join_hashtable.cpp +3 -3
  35. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
  36. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  37. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  38. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -0
  39. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  40. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -7
  41. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +4 -41
  42. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
  43. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +1 -1
  44. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +2 -2
  45. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +3 -4
  46. package/src/duckdb/src/execution/window_segment_tree.cpp +1 -1
  47. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
  48. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -1
  49. package/src/duckdb/src/function/scalar/string/concat.cpp +1 -1
  50. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  51. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  52. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  53. package/src/duckdb/src/function/table/read_csv.cpp +43 -35
  54. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  55. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
  56. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -3
  57. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -5
  58. package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -9
  59. package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +15 -0
  60. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +1 -0
  61. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/common/serializer/buffered_serializer.hpp +3 -2
  64. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -3
  65. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +11 -6
  66. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -1
  67. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -3
  69. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +4 -4
  70. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +53 -22
  71. package/src/duckdb/src/include/duckdb/common/vector.hpp +5 -2
  72. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +4 -4
  74. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +1 -1
  75. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +0 -2
  78. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -2
  79. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +27 -127
  80. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +2 -2
  81. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +2 -2
  82. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +2 -4
  84. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +3 -3
  85. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +5 -0
  86. package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -2
  87. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -2
  88. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +9 -2
  89. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +1 -1
  90. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
  91. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
  93. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +2 -0
  94. package/src/duckdb/src/main/client_context.cpp +1 -0
  95. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -1
  96. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +25 -1
  97. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +5 -5
  98. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +14 -6
  99. package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -1
  100. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +10 -0
  101. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +64 -42
  102. package/src/duckdb/src/storage/arena_allocator.cpp +1 -1
  103. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -11
  104. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
  105. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -2
  106. package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
  107. package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
  108. package/src/duckdb/src/storage/table/row_group.cpp +2 -2
  109. package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -25
  110. package/src/duckdb/src/storage/table/update_segment.cpp +7 -6
  111. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -2
  112. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +9 -0
  113. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +13 -12
  114. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  115. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +13050 -12885
  116. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  117. package/src/statement.cpp +15 -13
@@ -4,7 +4,6 @@
4
4
  #include "duckdb/main/database.hpp"
5
5
  #include "duckdb/common/string_util.hpp"
6
6
  #include "duckdb/common/enum_util.hpp"
7
- #include "duckdb/common/hive_partitioning.hpp"
8
7
  #include "duckdb/common/union_by_name.hpp"
9
8
  #include "duckdb/main/config.hpp"
10
9
  #include "duckdb/parser/expression/constant_expression.hpp"
@@ -15,7 +14,6 @@
15
14
  #include "duckdb/common/multi_file_reader.hpp"
16
15
  #include "duckdb/main/client_data.hpp"
17
16
  #include "duckdb/execution/operator/persistent/csv_line_info.hpp"
18
-
19
17
  #include <limits>
20
18
 
21
19
  namespace duckdb {
@@ -23,11 +21,8 @@ namespace duckdb {
23
21
  unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const string &file_path, FileCompressionType compression,
24
22
  ClientContext &context) {
25
23
  auto &fs = FileSystem::GetFileSystem(context);
26
- auto file_handle = fs.OpenFile(file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
27
- if (file_handle->CanSeek()) {
28
- file_handle->Reset();
29
- }
30
- return make_uniq<CSVFileHandle>(std::move(file_handle), false);
24
+ auto &allocator = BufferAllocator::Get(context);
25
+ return CSVFileHandle::OpenFile(fs, allocator, file_path, compression, false);
31
26
  }
32
27
 
33
28
  void ReadCSVData::FinalizeRead(ClientContext &context) {
@@ -238,14 +233,6 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
238
233
  } else {
239
234
  result->reader_bind = MultiFileReader::BindOptions(options.file_options, result->files, return_types, names);
240
235
  }
241
- auto &fs = FileSystem::GetFileSystem(context);
242
- for (auto &file : result->files) {
243
- if (fs.IsPipe(file)) {
244
- result->is_pipe = true;
245
- result->single_threaded = true;
246
- break;
247
- }
248
- }
249
236
  result->return_types = return_types;
250
237
  result->return_names = names;
251
238
  result->FinalizeRead(context);
@@ -265,7 +252,7 @@ static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFun
265
252
  struct ParallelCSVGlobalState : public GlobalTableFunctionState {
266
253
  public:
267
254
  ParallelCSVGlobalState(ClientContext &context, unique_ptr<CSVFileHandle> file_handle_p,
268
- vector<string> &files_path_p, idx_t system_threads_p, idx_t buffer_size_p,
255
+ const vector<string> &files_path_p, idx_t system_threads_p, idx_t buffer_size_p,
269
256
  idx_t rows_to_skip, bool force_parallelism_p, vector<column_t> column_ids_p, bool has_header)
270
257
  : file_handle(std::move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p),
271
258
  force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)),
@@ -278,6 +265,7 @@ public:
278
265
  }
279
266
  file_size = file_handle->FileSize();
280
267
  first_file_size = file_size;
268
+ on_disk_file = file_handle->OnDiskFile();
281
269
  bytes_read = 0;
282
270
  if (buffer_size < file_size || file_size == 0) {
283
271
  bytes_per_local_state = buffer_size / ParallelCSVGlobalState::MaxThreads();
@@ -335,7 +323,7 @@ public:
335
323
 
336
324
  bool Finished();
337
325
 
338
- double GetProgress(ReadCSVData &bind_data) const {
326
+ double GetProgress(const ReadCSVData &bind_data) const {
339
327
  idx_t total_files = bind_data.files.size();
340
328
 
341
329
  // get the progress WITHIN the current file
@@ -369,6 +357,8 @@ private:
369
357
  idx_t bytes_per_local_state;
370
358
  //! Size of first file
371
359
  idx_t first_file_size;
360
+ //! Whether or not this is an on-disk file
361
+ bool on_disk_file = true;
372
362
  //! Basically max number of threads in DuckDB
373
363
  idx_t system_threads;
374
364
  //! Size of the buffers
@@ -402,7 +392,7 @@ private:
402
392
  };
403
393
 
404
394
  idx_t ParallelCSVGlobalState::MaxThreads() const {
405
- if (force_parallelism) {
395
+ if (force_parallelism || !on_disk_file) {
406
396
  return system_threads;
407
397
  }
408
398
  idx_t one_mb = 1000000; // We initialize max one thread per Mb
@@ -628,7 +618,7 @@ idx_t LineInfo::GetLine(idx_t batch_idx, idx_t line_error, idx_t file_idx, idx_t
628
618
 
629
619
  static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext &context,
630
620
  TableFunctionInitInput &input) {
631
- auto &bind_data = (ReadCSVData &)*input.bind_data;
621
+ auto &bind_data = input.bind_data->CastNoConst<ReadCSVData>();
632
622
  if (bind_data.files.empty()) {
633
623
  // This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
634
624
  return make_uniq<ParallelCSVGlobalState>();
@@ -636,7 +626,15 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
636
626
  unique_ptr<CSVFileHandle> file_handle;
637
627
 
638
628
  bind_data.options.file_path = bind_data.files[0];
639
- file_handle = ReadCSV::OpenCSV(bind_data.options.file_path, bind_data.options.compression, context);
629
+
630
+ if (bind_data.initial_reader) {
631
+ file_handle = std::move(bind_data.initial_reader->file_handle);
632
+ file_handle->Reset();
633
+ file_handle->DisableReset();
634
+ bind_data.initial_reader.reset();
635
+ } else {
636
+ file_handle = ReadCSV::OpenCSV(bind_data.options.file_path, bind_data.options.compression, context);
637
+ }
640
638
  return make_uniq<ParallelCSVGlobalState>(
641
639
  context, std::move(file_handle), bind_data.files, context.db->NumberOfThreads(), bind_data.options.buffer_size,
642
640
  bind_data.options.skip_rows, ClientConfig::GetConfig(context).verify_parallelism, input.column_ids,
@@ -738,7 +736,7 @@ struct SingleThreadedCSVState : public GlobalTableFunctionState {
738
736
  return total_files;
739
737
  }
740
738
 
741
- double GetProgress(ReadCSVData &bind_data) const {
739
+ double GetProgress(const ReadCSVData &bind_data) const {
742
740
  D_ASSERT(total_files == bind_data.files.size());
743
741
  D_ASSERT(progress_in_files <= total_files * 100);
744
742
  return (double(progress_in_files) / double(total_files));
@@ -746,6 +744,16 @@ struct SingleThreadedCSVState : public GlobalTableFunctionState {
746
744
 
747
745
  unique_ptr<BufferedCSVReader> GetCSVReader(ClientContext &context, ReadCSVData &bind_data, idx_t &file_index,
748
746
  idx_t &total_size) {
747
+ auto reader = GetCSVReaderInternal(context, bind_data, file_index, total_size);
748
+ if (reader) {
749
+ reader->file_handle->DisableReset();
750
+ }
751
+ return reader;
752
+ }
753
+
754
+ private:
755
+ unique_ptr<BufferedCSVReader> GetCSVReaderInternal(ClientContext &context, ReadCSVData &bind_data,
756
+ idx_t &file_index, idx_t &total_size) {
749
757
  BufferedCSVReaderOptions options;
750
758
  {
751
759
  lock_guard<mutex> l(csv_lock);
@@ -799,14 +807,14 @@ public:
799
807
 
800
808
  static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext &context,
801
809
  TableFunctionInitInput &input) {
802
- auto &bind_data = (ReadCSVData &)*input.bind_data;
810
+ auto &bind_data = input.bind_data->CastNoConst<ReadCSVData>();
803
811
  auto result = make_uniq<SingleThreadedCSVState>(bind_data.files.size());
804
812
  if (bind_data.files.empty()) {
805
813
  // This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
806
814
  return std::move(result);
807
815
  } else {
808
816
  bind_data.options.file_path = bind_data.files[0];
809
- if (bind_data.initial_reader && bind_data.is_pipe) {
817
+ if (bind_data.initial_reader) {
810
818
  // If this is a pipe and an initial reader already exists due to read_csv_auto
811
819
  // We must re-use it, since we can't restart the reader due for it being a pipe.
812
820
  result->initial_reader = std::move(bind_data.initial_reader);
@@ -904,7 +912,7 @@ static void SingleThreadedCSVFunction(ClientContext &context, TableFunctionInput
904
912
  // Read CSV Functions
905
913
  //===--------------------------------------------------------------------===//
906
914
  static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &context, TableFunctionInitInput &input) {
907
- auto &bind_data = (ReadCSVData &)*input.bind_data;
915
+ auto &bind_data = input.bind_data->Cast<ReadCSVData>();
908
916
  if (bind_data.single_threaded) {
909
917
  return SingleThreadedCSVInit(context, input);
910
918
  } else {
@@ -914,7 +922,7 @@ static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &con
914
922
 
915
923
  unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
916
924
  GlobalTableFunctionState *global_state_p) {
917
- auto &csv_data = (ReadCSVData &)*input.bind_data;
925
+ auto &csv_data = input.bind_data->Cast<ReadCSVData>();
918
926
  if (csv_data.single_threaded) {
919
927
  return SingleThreadedReadCSVInitLocal(context, input, global_state_p);
920
928
  } else {
@@ -923,7 +931,7 @@ unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context,
923
931
  }
924
932
 
925
933
  static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
926
- auto &bind_data = (ReadCSVData &)*data_p.bind_data;
934
+ auto &bind_data = data_p.bind_data->Cast<ReadCSVData>();
927
935
  if (bind_data.single_threaded) {
928
936
  SingleThreadedCSVFunction(context, data_p, output);
929
937
  } else {
@@ -933,7 +941,7 @@ static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p,
933
941
 
934
942
  static idx_t CSVReaderGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
935
943
  LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
936
- auto &bind_data = (ReadCSVData &)*bind_data_p;
944
+ auto &bind_data = bind_data_p->Cast<ReadCSVData>();
937
945
  if (bind_data.single_threaded) {
938
946
  auto &data = local_state->Cast<SingleThreadedCSVLocalState>();
939
947
  return data.file_index;
@@ -980,28 +988,28 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
980
988
 
981
989
  double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
982
990
  const GlobalTableFunctionState *global_state) {
983
- auto &bind_data = (ReadCSVData &)*bind_data_p;
991
+ auto &bind_data = bind_data_p->Cast<ReadCSVData>();
984
992
  if (bind_data.single_threaded) {
985
- auto &data = (SingleThreadedCSVState &)*global_state;
993
+ auto &data = global_state->Cast<SingleThreadedCSVState>();
986
994
  return data.GetProgress(bind_data);
987
995
  } else {
988
- auto &data = (const ParallelCSVGlobalState &)*global_state;
996
+ auto &data = global_state->Cast<ParallelCSVGlobalState>();
989
997
  return data.GetProgress(bind_data);
990
998
  }
991
999
  }
992
1000
 
993
1001
  void CSVComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p,
994
1002
  vector<unique_ptr<Expression>> &filters) {
995
- auto data = (ReadCSVData *)bind_data_p;
1003
+ auto &data = bind_data_p->Cast<ReadCSVData>();
996
1004
  auto reset_reader =
997
- MultiFileReader::ComplexFilterPushdown(context, data->files, data->options.file_options, get, filters);
1005
+ MultiFileReader::ComplexFilterPushdown(context, data.files, data.options.file_options, get, filters);
998
1006
  if (reset_reader) {
999
- MultiFileReader::PruneReaders(*data);
1007
+ MultiFileReader::PruneReaders(data);
1000
1008
  }
1001
1009
  }
1002
1010
 
1003
1011
  unique_ptr<NodeStatistics> CSVReaderCardinality(ClientContext &context, const FunctionData *bind_data_p) {
1004
- auto &bind_data = (ReadCSVData &)*bind_data_p;
1012
+ auto &bind_data = bind_data_p->Cast<ReadCSVData>();
1005
1013
  idx_t per_file_cardinality = 0;
1006
1014
  if (bind_data.initial_reader && bind_data.initial_reader->file_handle) {
1007
1015
  auto estimated_row_width = (bind_data.csv_types.size() * 5);
@@ -1086,7 +1094,7 @@ void BufferedCSVReaderOptions::Deserialize(FieldReader &reader) {
1086
1094
  }
1087
1095
 
1088
1096
  static void CSVReaderSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const TableFunction &function) {
1089
- auto &bind_data = (ReadCSVData &)*bind_data_p;
1097
+ auto &bind_data = bind_data_p->Cast<ReadCSVData>();
1090
1098
  writer.WriteList<string>(bind_data.files);
1091
1099
  writer.WriteRegularSerializableList<LogicalType>(bind_data.csv_types);
1092
1100
  writer.WriteList<string>(bind_data.csv_names);
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev3402"
2
+ #define DUCKDB_VERSION "0.7.2-dev3515"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "6f543cb464"
5
+ #define DUCKDB_SOURCE_ID "12638bd156"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -54,6 +54,9 @@ public:
54
54
  idx_t GetSize() const {
55
55
  return allocated_size;
56
56
  }
57
+ bool IsSet() {
58
+ return pointer;
59
+ }
57
60
  void Reset();
58
61
 
59
62
  private:
@@ -18,8 +18,8 @@ struct StreamData {
18
18
  // various buffers & pointers
19
19
  bool write = false;
20
20
  bool refresh = false;
21
- unique_ptr<data_t[]> in_buff;
22
- unique_ptr<data_t[]> out_buff;
21
+ unsafe_array_ptr<data_t> in_buff;
22
+ unsafe_array_ptr<data_t> out_buff;
23
23
  data_ptr_t out_buff_start = nullptr;
24
24
  data_ptr_t out_buff_end = nullptr;
25
25
  data_ptr_t in_buff_start = nullptr;
@@ -59,7 +59,7 @@ public:
59
59
  class CompressedFile : public FileHandle {
60
60
  public:
61
61
  DUCKDB_API CompressedFile(CompressedFileSystem &fs, unique_ptr<FileHandle> child_handle_p, const string &path);
62
- DUCKDB_API virtual ~CompressedFile() override;
62
+ DUCKDB_API ~CompressedFile() override;
63
63
 
64
64
  CompressedFileSystem &compressed_fs;
65
65
  unique_ptr<FileHandle> child_handle;
@@ -21,11 +21,6 @@ namespace duckdb {
21
21
  using std::move;
22
22
  #endif
23
23
 
24
- // template <class _Tp, class _Dp = std::default_delete<_Tp>>
25
- // class unique_ptr;
26
-
27
- // using data_ptr = unique_ptr<char[]>;
28
-
29
24
  // NOTE: there is a copy of this in the Postgres' parser grammar (gram.y)
30
25
  #define DEFAULT_SCHEMA "main"
31
26
  #define INVALID_SCHEMA ""
@@ -37,10 +37,10 @@ namespace duckdb {
37
37
  #define DUCKDB_EXPLICIT_FALLTHROUGH
38
38
  #endif
39
39
 
40
- template<class _Tp>
40
+ template<class _Tp, bool SAFE = true>
41
41
  struct __unique_if
42
42
  {
43
- typedef unique_ptr<_Tp> __unique_single;
43
+ typedef unique_ptr<_Tp, SAFE> __unique_single;
44
44
  };
45
45
 
46
46
  template<class _Tp>
@@ -57,19 +57,32 @@ struct __unique_if<_Tp[_Np]>
57
57
 
58
58
  template<class _Tp, class... _Args>
59
59
  inline
60
- typename __unique_if<_Tp>::__unique_single
60
+ typename __unique_if<_Tp, true>::__unique_single
61
61
  make_uniq(_Args&&... __args)
62
62
  {
63
- return unique_ptr<_Tp>(new _Tp(std::forward<_Args>(__args)...));
63
+ return unique_ptr<_Tp, true>(new _Tp(std::forward<_Args>(__args)...));
64
64
  }
65
65
 
66
- template<class _Tp>
66
+ template<class _Tp, class... _Args>
67
67
  inline
68
- typename __unique_if<_Tp>::__unique_array_unknown_bound
69
- make_uniq(size_t __n)
68
+ typename __unique_if<_Tp, false>::__unique_single
69
+ make_unsafe_uniq(_Args&&... __args)
70
+ {
71
+ return unique_ptr<_Tp, false>(new _Tp(std::forward<_Args>(__args)...));
72
+ }
73
+
74
+ template<class _Tp>
75
+ inline unique_ptr<_Tp[], true>
76
+ make_array(size_t __n)
77
+ {
78
+ return unique_ptr<_Tp[], true>(new _Tp[__n]());
79
+ }
80
+
81
+ template<class _Tp>
82
+ inline unique_ptr<_Tp[], false>
83
+ make_unsafe_array(size_t __n)
70
84
  {
71
- typedef typename std::remove_extent<_Tp>::type _Up;
72
- return unique_ptr<_Tp>(new _Up[__n]());
85
+ return unique_ptr<_Tp[], false>(new _Tp[__n]());
73
86
  }
74
87
 
75
88
  template<class _Tp, class... _Args>
@@ -0,0 +1,15 @@
1
+ #pragma once
2
+
3
+ namespace duckdb {
4
+
5
+ template <bool ENABLED>
6
+ struct MemorySafety {
7
+ #ifdef DEBUG
8
+ // In DEBUG mode safety is always on
9
+ static constexpr bool enabled = true;
10
+ #else
11
+ static constexpr bool enabled = ENABLED;
12
+ #endif
13
+ };
14
+
15
+ } // namespace duckdb
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/exception.hpp"
12
+ #include "duckdb/common/unique_ptr.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
@@ -18,7 +18,7 @@ public:
18
18
  FileLockType lock_type = FileLockType::READ_LOCK, optional_ptr<FileOpener> opener = nullptr);
19
19
 
20
20
  FileSystem &fs;
21
- unique_ptr<data_t[]> data;
21
+ unsafe_array_ptr<data_t> data;
22
22
  idx_t offset;
23
23
  idx_t read_data;
24
24
  unique_ptr<FileHandle> handle;
@@ -25,7 +25,7 @@ public:
25
25
 
26
26
  FileSystem &fs;
27
27
  string path;
28
- unique_ptr<data_t[]> data;
28
+ unsafe_array_ptr<data_t> data;
29
29
  idx_t offset;
30
30
  idx_t total_written;
31
31
  unique_ptr<FileHandle> handle;
@@ -9,13 +9,14 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/serializer.hpp"
12
+ #include "duckdb/common/unique_ptr.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
15
16
  #define SERIALIZER_DEFAULT_SIZE 1024
16
17
 
17
18
  struct BinaryData {
18
- unique_ptr<data_t[]> data;
19
+ unsafe_array_ptr<data_t> data;
19
20
  idx_t size;
20
21
  };
21
22
 
@@ -25,7 +26,7 @@ public:
25
26
  //! writing past the initial threshold
26
27
  DUCKDB_API explicit BufferedSerializer(idx_t maximum_size = SERIALIZER_DEFAULT_SIZE);
27
28
  //! Serializes to a provided (owned) data pointer
28
- BufferedSerializer(unique_ptr<data_t[]> data, idx_t size);
29
+ BufferedSerializer(unsafe_array_ptr<data_t> data, idx_t size);
29
30
  BufferedSerializer(data_ptr_t data, idx_t size);
30
31
 
31
32
  idx_t maximum_size;
@@ -65,10 +65,9 @@ struct is_unordered_map<typename std::unordered_map<Args...>> : std::true_type {
65
65
  template <typename T>
66
66
  struct is_unique_ptr : std::false_type {};
67
67
 
68
- template <typename T, typename D>
69
- struct is_unique_ptr<unique_ptr<T, D>> : std::true_type {
68
+ template <typename T>
69
+ struct is_unique_ptr<unique_ptr<T>> : std::true_type {
70
70
  typedef T ELEMENT_TYPE;
71
- typedef D DELETER_TYPE;
72
71
  };
73
72
 
74
73
  template <typename T>
@@ -25,6 +25,7 @@ applications, and to alter it and redistribute it freely, subject to the followi
25
25
  #include "duckdb/common/fast_mem.hpp"
26
26
  #include "duckdb/common/helper.hpp"
27
27
  #include "duckdb/common/types.hpp"
28
+ #include "duckdb/common/unique_ptr.hpp"
28
29
 
29
30
  #include <algorithm>
30
31
  #include <cstddef>
@@ -38,6 +39,10 @@ using duckdb::idx_t;
38
39
  using duckdb::data_t;
39
40
  using duckdb::data_ptr_t;
40
41
  using duckdb::unique_ptr;
42
+ using duckdb::array_ptr;
43
+ using duckdb::unsafe_array_ptr;
44
+ using duckdb::make_array;
45
+ using duckdb::make_unsafe_array;
41
46
  using duckdb::FastMemcpy;
42
47
  using duckdb::FastMemcmp;
43
48
 
@@ -73,9 +78,9 @@ inline int log2(T n) {
73
78
  struct PDQConstants {
74
79
  PDQConstants(idx_t entry_size, idx_t comp_offset, idx_t comp_size, data_ptr_t end)
75
80
  : entry_size(entry_size), comp_offset(comp_offset), comp_size(comp_size),
76
- tmp_buf_ptr(unique_ptr<data_t[]>(new data_t[entry_size])), tmp_buf(tmp_buf_ptr.get()),
77
- iter_swap_buf_ptr(unique_ptr<data_t[]>(new data_t[entry_size])), iter_swap_buf(iter_swap_buf_ptr.get()),
78
- swap_offsets_buf_ptr(unique_ptr<data_t[]>(new data_t[entry_size])),
81
+ tmp_buf_ptr(make_unsafe_array<data_t>(entry_size)), tmp_buf(tmp_buf_ptr.get()),
82
+ iter_swap_buf_ptr(make_unsafe_array<data_t>(entry_size)), iter_swap_buf(iter_swap_buf_ptr.get()),
83
+ swap_offsets_buf_ptr(make_unsafe_array<data_t>(entry_size)),
79
84
  swap_offsets_buf(swap_offsets_buf_ptr.get()), end(end) {
80
85
  }
81
86
 
@@ -83,13 +88,13 @@ struct PDQConstants {
83
88
  const idx_t comp_offset;
84
89
  const idx_t comp_size;
85
90
 
86
- unique_ptr<data_t[]> tmp_buf_ptr;
91
+ unsafe_array_ptr<data_t> tmp_buf_ptr;
87
92
  const data_ptr_t tmp_buf;
88
93
 
89
- unique_ptr<data_t[]> iter_swap_buf_ptr;
94
+ unsafe_array_ptr<data_t> iter_swap_buf_ptr;
90
95
  const data_ptr_t iter_swap_buf;
91
96
 
92
- unique_ptr<data_t[]> swap_offsets_buf_ptr;
97
+ unsafe_array_ptr<data_t> swap_offsets_buf_ptr;
93
98
  const data_ptr_t swap_offsets_buf;
94
99
 
95
100
  const data_ptr_t end;
@@ -124,7 +124,8 @@ public:
124
124
  //! Turn all the vectors from the chunk into flat vectors
125
125
  DUCKDB_API void Flatten();
126
126
 
127
- DUCKDB_API unique_ptr<UnifiedVectorFormat[]> ToUnifiedFormat();
127
+ // FIXME: this is DUCKDB_API, might need conversion back to regular unique ptr?
128
+ DUCKDB_API unsafe_array_ptr<UnifiedVectorFormat> ToUnifiedFormat();
128
129
 
129
130
  DUCKDB_API void Slice(const SelectionVector &sel_vector, idx_t count);
130
131
 
@@ -18,7 +18,7 @@ class VectorBuffer;
18
18
  struct SelectionData {
19
19
  DUCKDB_API explicit SelectionData(idx_t count);
20
20
 
21
- unique_ptr<sel_t[]> owned_data;
21
+ unsafe_array_ptr<sel_t> owned_data;
22
22
  };
23
23
 
24
24
  struct SelectionVector {
@@ -24,7 +24,7 @@ struct TemplatedValidityData {
24
24
  public:
25
25
  inline explicit TemplatedValidityData(idx_t count) {
26
26
  auto entry_count = EntryCount(count);
27
- owned_data = unique_ptr<V[]>(new V[entry_count]);
27
+ owned_data = make_unsafe_array<V>(entry_count);
28
28
  for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) {
29
29
  owned_data[entry_idx] = MAX_ENTRY;
30
30
  }
@@ -32,13 +32,13 @@ public:
32
32
  inline TemplatedValidityData(const V *validity_mask, idx_t count) {
33
33
  D_ASSERT(validity_mask);
34
34
  auto entry_count = EntryCount(count);
35
- owned_data = unique_ptr<V[]>(new V[entry_count]);
35
+ owned_data = make_unsafe_array<V>(entry_count);
36
36
  for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) {
37
37
  owned_data[entry_idx] = validity_mask[entry_idx];
38
38
  }
39
39
  }
40
40
 
41
- unique_ptr<V[]> owned_data;
41
+ unsafe_array_ptr<V> owned_data;
42
42
 
43
43
  public:
44
44
  static inline idx_t EntryCount(idx_t count) {
@@ -71,10 +71,10 @@ public:
71
71
  }
72
72
  explicit VectorBuffer(idx_t data_size) : buffer_type(VectorBufferType::STANDARD_BUFFER) {
73
73
  if (data_size > 0) {
74
- data = unique_ptr<data_t[]>(new data_t[data_size]);
74
+ data = make_unsafe_array<data_t>(data_size);
75
75
  }
76
76
  }
77
- explicit VectorBuffer(unique_ptr<data_t[]> data_p)
77
+ explicit VectorBuffer(unsafe_array_ptr<data_t> data_p)
78
78
  : buffer_type(VectorBufferType::STANDARD_BUFFER), data(std::move(data_p)) {
79
79
  }
80
80
  virtual ~VectorBuffer() {
@@ -87,7 +87,7 @@ public:
87
87
  return data.get();
88
88
  }
89
89
 
90
- void SetData(unique_ptr<data_t[]> new_data) {
90
+ void SetData(unsafe_array_ptr<data_t> new_data) {
91
91
  data = std::move(new_data);
92
92
  }
93
93
 
@@ -120,7 +120,7 @@ public:
120
120
  protected:
121
121
  VectorBufferType buffer_type;
122
122
  unique_ptr<VectorAuxiliaryData> aux_data;
123
- unique_ptr<data_t[]> data;
123
+ unsafe_array_ptr<data_t> data;
124
124
  };
125
125
 
126
126
  //! The DictionaryBuffer holds a selection vector
@@ -2,38 +2,45 @@
2
2
 
3
3
  #include "duckdb/common/exception.hpp"
4
4
  #include "duckdb/common/likely.hpp"
5
+ #include "duckdb/common/memory_safety.hpp"
5
6
 
6
7
  #include <memory>
7
8
  #include <type_traits>
8
9
 
9
10
  namespace duckdb {
10
11
 
11
- namespace {
12
- struct __unique_ptr_utils {
13
- static inline void AssertNotNull(void *ptr) {
14
- #ifdef DEBUG
15
- if (DUCKDB_UNLIKELY(!ptr)) {
16
- throw InternalException("Attempted to dereference unique_ptr that is NULL!");
12
+ template <class _Tp, bool SAFE = true>
13
+ class unique_ptr : public std::unique_ptr<_Tp, std::default_delete<_Tp>> {
14
+ public:
15
+ using original = std::unique_ptr<_Tp, std::default_delete<_Tp>>;
16
+ using original::original;
17
+
18
+ private:
19
+ static inline void AssertNotNull(const bool null) {
20
+ #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
21
+ return;
22
+ #else
23
+ if (DUCKDB_UNLIKELY(null)) {
24
+ throw duckdb::InternalException("Attempted to dereference unique_ptr that is NULL!");
17
25
  }
18
26
  #endif
19
27
  }
20
- };
21
- } // namespace
22
28
 
23
- template <class _Tp, class _Dp = std::default_delete<_Tp>>
24
- class unique_ptr : public std::unique_ptr<_Tp, _Dp> {
25
29
  public:
26
- using original = std::unique_ptr<_Tp, _Dp>;
27
- using original::original;
28
-
29
30
  typename std::add_lvalue_reference<_Tp>::type operator*() const {
30
- __unique_ptr_utils::AssertNotNull((void *)original::get());
31
- return *(original::get());
31
+ const auto ptr = original::get();
32
+ if (MemorySafety<SAFE>::enabled) {
33
+ AssertNotNull(!ptr);
34
+ }
35
+ return *ptr;
32
36
  }
33
37
 
34
38
  typename original::pointer operator->() const {
35
- __unique_ptr_utils::AssertNotNull((void *)original::get());
36
- return original::get();
39
+ const auto ptr = original::get();
40
+ if (MemorySafety<SAFE>::enabled) {
41
+ AssertNotNull(!ptr);
42
+ }
43
+ return ptr;
37
44
  }
38
45
 
39
46
  #ifdef DUCKDB_CLANG_TIDY
@@ -46,16 +53,40 @@ public:
46
53
  }
47
54
  };
48
55
 
49
- template <class _Tp, class _Dp>
50
- class unique_ptr<_Tp[], _Dp> : public std::unique_ptr<_Tp[], _Dp> {
56
+ template <class _Tp, bool SAFE>
57
+ class unique_ptr<_Tp[], SAFE> : public std::unique_ptr<_Tp[], std::default_delete<_Tp[]>> {
51
58
  public:
52
- using original = std::unique_ptr<_Tp[], _Dp>;
59
+ using original = std::unique_ptr<_Tp[], std::default_delete<_Tp[]>>;
53
60
  using original::original;
54
61
 
62
+ private:
63
+ static inline void AssertNotNull(const bool null) {
64
+ #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
65
+ return;
66
+ #else
67
+ if (DUCKDB_UNLIKELY(null)) {
68
+ throw duckdb::InternalException("Attempted to dereference unique_ptr that is NULL!");
69
+ }
70
+ #endif
71
+ }
72
+
73
+ public:
55
74
  typename std::add_lvalue_reference<_Tp>::type operator[](size_t __i) const {
56
- __unique_ptr_utils::AssertNotNull((void *)original::get());
57
- return (original::get())[__i];
75
+ const auto ptr = original::get();
76
+ if (MemorySafety<SAFE>::enabled) {
77
+ AssertNotNull(!ptr);
78
+ }
79
+ return ptr[__i];
58
80
  }
59
81
  };
60
82
 
83
+ template <typename T>
84
+ using array_ptr = unique_ptr<T[], true>;
85
+
86
+ template <typename T>
87
+ using unsafe_array_ptr = unique_ptr<T[], false>;
88
+
89
+ template <typename T>
90
+ using unsafe_unique_ptr = unique_ptr<T, false>;
91
+
61
92
  } // namespace duckdb