duckdb 0.7.1-dev37.0 → 0.7.1-dev415.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/README.md +1 -1
  2. package/binding.gyp +7 -7
  3. package/package.json +3 -3
  4. package/src/duckdb/extension/json/buffered_json_reader.cpp +50 -9
  5. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +7 -2
  6. package/src/duckdb/extension/json/include/json_scan.hpp +45 -10
  7. package/src/duckdb/extension/json/json_functions/copy_json.cpp +35 -22
  8. package/src/duckdb/extension/json/json_functions/json_create.cpp +8 -8
  9. package/src/duckdb/extension/json/json_functions/json_structure.cpp +8 -3
  10. package/src/duckdb/extension/json/json_functions/json_transform.cpp +54 -10
  11. package/src/duckdb/extension/json/json_functions/read_json.cpp +104 -49
  12. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +5 -3
  13. package/src/duckdb/extension/json/json_functions.cpp +7 -0
  14. package/src/duckdb/extension/json/json_scan.cpp +144 -37
  15. package/src/duckdb/extension/parquet/column_reader.cpp +7 -0
  16. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -0
  17. package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -9
  18. package/src/duckdb/src/catalog/catalog.cpp +62 -13
  19. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +8 -7
  20. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -1
  21. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  22. package/src/duckdb/src/catalog/default/default_views.cpp +1 -1
  23. package/src/duckdb/src/common/bind_helpers.cpp +55 -0
  24. package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
  25. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  26. package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
  27. package/src/duckdb/src/common/file_system.cpp +28 -0
  28. package/src/duckdb/src/common/hive_partitioning.cpp +1 -0
  29. package/src/duckdb/src/common/local_file_system.cpp +4 -4
  30. package/src/duckdb/src/common/operator/cast_operators.cpp +10 -4
  31. package/src/duckdb/src/common/string_util.cpp +8 -4
  32. package/src/duckdb/src/common/types/partitioned_column_data.cpp +1 -0
  33. package/src/duckdb/src/common/types/time.cpp +1 -1
  34. package/src/duckdb/src/common/types/timestamp.cpp +35 -4
  35. package/src/duckdb/src/common/types.cpp +37 -11
  36. package/src/duckdb/src/execution/column_binding_resolver.cpp +5 -2
  37. package/src/duckdb/src/execution/index/art/art.cpp +117 -67
  38. package/src/duckdb/src/execution/index/art/art_key.cpp +24 -12
  39. package/src/duckdb/src/execution/index/art/leaf.cpp +7 -8
  40. package/src/duckdb/src/execution/index/art/node.cpp +13 -27
  41. package/src/duckdb/src/execution/index/art/node16.cpp +5 -8
  42. package/src/duckdb/src/execution/index/art/node256.cpp +3 -5
  43. package/src/duckdb/src/execution/index/art/node4.cpp +4 -7
  44. package/src/duckdb/src/execution/index/art/node48.cpp +5 -8
  45. package/src/duckdb/src/execution/index/art/prefix.cpp +2 -3
  46. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +6 -27
  47. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +1 -9
  48. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -9
  49. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -9
  50. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +6 -11
  51. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +13 -13
  52. package/src/duckdb/src/execution/operator/schema/physical_detach.cpp +37 -0
  53. package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +0 -5
  54. package/src/duckdb/src/execution/physical_operator.cpp +6 -6
  55. package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +4 -0
  56. package/src/duckdb/src/execution/physical_plan_generator.cpp +1 -0
  57. package/src/duckdb/src/function/pragma/pragma_queries.cpp +38 -11
  58. package/src/duckdb/src/function/scalar/generic/current_setting.cpp +2 -2
  59. package/src/duckdb/src/function/scalar/map/map.cpp +69 -21
  60. package/src/duckdb/src/function/table/read_csv.cpp +17 -5
  61. package/src/duckdb/src/function/table/system/duckdb_temporary_files.cpp +59 -0
  62. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  63. package/src/duckdb/src/function/table/table_scan.cpp +3 -0
  64. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  65. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +7 -1
  66. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  67. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/common/bind_helpers.hpp +2 -0
  69. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
  70. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  71. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +3 -2
  72. package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +3 -0
  73. package/src/duckdb/src/include/duckdb/common/exception.hpp +10 -0
  74. package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -0
  75. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +9 -1
  76. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +4 -4
  77. package/src/duckdb/src/include/duckdb/common/string_util.hpp +9 -2
  78. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +5 -1
  79. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +37 -41
  80. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +8 -11
  81. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +1 -3
  82. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -2
  83. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  84. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_detach.hpp +32 -0
  85. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  86. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -3
  88. package/src/duckdb/src/include/duckdb/main/{extension_functions.hpp → extension_entries.hpp} +26 -5
  89. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +3 -0
  90. package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
  91. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +0 -7
  92. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_database_info.hpp +0 -4
  93. package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +32 -0
  94. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +1 -1
  95. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -2
  96. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/parser/statement/detach_statement.hpp +29 -0
  98. package/src/duckdb/src/include/duckdb/parser/statement/list.hpp +1 -0
  99. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +3 -3
  100. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +1 -1
  101. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  102. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
  103. package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -0
  104. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +10 -3
  105. package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -5
  106. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +1 -2
  107. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +8 -0
  108. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +7 -1
  109. package/src/duckdb/src/include/duckdb/storage/index.hpp +47 -38
  110. package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +7 -0
  111. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -0
  112. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +7 -0
  113. package/src/duckdb/src/main/client_context.cpp +2 -0
  114. package/src/duckdb/src/main/config.cpp +1 -0
  115. package/src/duckdb/src/main/database.cpp +14 -5
  116. package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
  117. package/src/duckdb/src/main/extension/extension_install.cpp +43 -9
  118. package/src/duckdb/src/main/extension/extension_load.cpp +29 -5
  119. package/src/duckdb/src/main/settings/settings.cpp +16 -0
  120. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +2 -6
  121. package/src/duckdb/src/parallel/pipeline_executor.cpp +1 -55
  122. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +3 -0
  123. package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -13
  124. package/src/duckdb/src/parser/statement/delete_statement.cpp +3 -0
  125. package/src/duckdb/src/parser/statement/detach_statement.cpp +15 -0
  126. package/src/duckdb/src/parser/statement/insert_statement.cpp +9 -0
  127. package/src/duckdb/src/parser/statement/update_statement.cpp +3 -0
  128. package/src/duckdb/src/parser/transform/expression/transform_case.cpp +3 -3
  129. package/src/duckdb/src/parser/transform/statement/transform_create_database.cpp +0 -1
  130. package/src/duckdb/src/parser/transform/statement/transform_detach.cpp +19 -0
  131. package/src/duckdb/src/parser/transformer.cpp +2 -0
  132. package/src/duckdb/src/planner/bind_context.cpp +1 -1
  133. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +3 -0
  134. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +7 -14
  135. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +16 -14
  136. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +13 -0
  137. package/src/duckdb/src/planner/binder/statement/bind_detach.cpp +19 -0
  138. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +29 -4
  139. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +22 -1
  140. package/src/duckdb/src/planner/binder.cpp +2 -0
  141. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +32 -1
  142. package/src/duckdb/src/planner/logical_operator.cpp +6 -1
  143. package/src/duckdb/src/planner/planner.cpp +1 -0
  144. package/src/duckdb/src/storage/buffer_manager.cpp +105 -26
  145. package/src/duckdb/src/storage/compression/bitpacking.cpp +16 -7
  146. package/src/duckdb/src/storage/data_table.cpp +66 -3
  147. package/src/duckdb/src/storage/index.cpp +1 -1
  148. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  149. package/src/duckdb/src/storage/table/column_data.cpp +4 -2
  150. package/src/duckdb/src/storage/table/update_segment.cpp +15 -0
  151. package/src/duckdb/src/storage/table_index_list.cpp +1 -2
  152. package/src/duckdb/src/storage/wal_replay.cpp +68 -0
  153. package/src/duckdb/src/storage/write_ahead_log.cpp +21 -1
  154. package/src/duckdb/src/transaction/commit_state.cpp +5 -2
  155. package/src/duckdb/third_party/concurrentqueue/blockingconcurrentqueue.h +2 -2
  156. package/src/duckdb/third_party/fmt/include/fmt/core.h +1 -2
  157. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +1 -0
  158. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +14 -0
  159. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +530 -1006
  160. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17659 -17626
  161. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  162. package/src/duckdb/ub_src_execution_operator_schema.cpp +2 -0
  163. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  164. package/src/duckdb/ub_src_parser_statement.cpp +2 -0
  165. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
  166. package/src/duckdb/ub_src_planner_binder_statement.cpp +2 -0
  167. package/src/statement.cpp +46 -12
  168. package/test/arrow.test.ts +3 -3
  169. package/test/prepare.test.ts +39 -1
  170. package/test/typescript_decls.test.ts +1 -1
  171. package/src/duckdb/src/include/duckdb/function/create_database_extension.hpp +0 -37
@@ -62,7 +62,7 @@ BlockHandle::BlockHandle(BlockManager &block_manager, block_id_t block_id_p, uni
62
62
  memory_charge = std::move(reservation);
63
63
  }
64
64
 
65
- BlockHandle::~BlockHandle() {
65
+ BlockHandle::~BlockHandle() { // NOLINT: allow internal exceptions
66
66
  // being destroyed, so any unswizzled pointers are just binary junk now.
67
67
  unswizzled = nullptr;
68
68
  auto &buffer_manager = block_manager.buffer_manager;
@@ -230,6 +230,7 @@ public:
230
230
  private:
231
231
  DatabaseInstance &db;
232
232
  string temp_directory;
233
+ bool created_directory = false;
233
234
  unique_ptr<TemporaryFileManager> temp_file;
234
235
  };
235
236
 
@@ -521,11 +522,8 @@ void BufferManager::PurgeQueue() {
521
522
 
522
523
  void BlockManager::UnregisterBlock(block_id_t block_id, bool can_destroy) {
523
524
  if (block_id >= MAXIMUM_BLOCK) {
524
- // in-memory buffer: destroy the buffer
525
- if (!can_destroy) {
526
- // buffer could have been offloaded to disk: remove the file
527
- buffer_manager.DeleteTemporaryFile(block_id);
528
- }
525
+ // in-memory buffer: buffer could have been offloaded to disk: remove the file
526
+ buffer_manager.DeleteTemporaryFile(block_id);
529
527
  } else {
530
528
  lock_guard<mutex> lock(blocks_lock);
531
529
  // on-disk block: erase from list of blocks in manager
@@ -607,7 +605,11 @@ public:
607
605
  //! Returns true if the max_index has been altered
608
606
  bool RemoveIndex(idx_t index) {
609
607
  // remove this block from the set of blocks
610
- indexes_in_use.erase(index);
608
+ auto entry = indexes_in_use.find(index);
609
+ if (entry == indexes_in_use.end()) {
610
+ throw InternalException("RemoveIndex - index %llu not found in indexes_in_use", index);
611
+ }
612
+ indexes_in_use.erase(entry);
611
613
  free_indexes.insert(index);
612
614
  // check if we can truncate the file
613
615
 
@@ -616,7 +618,7 @@ public:
616
618
  if (max_index_in_use < max_index) {
617
619
  // max index in use is lower than the max_index
618
620
  // reduce the max_index
619
- max_index = max_index_in_use + 1;
621
+ max_index = indexes_in_use.empty() ? 0 : max_index_in_use + 1;
620
622
  // we can remove any free_indexes that are larger than the current max_index
621
623
  while (!free_indexes.empty()) {
622
624
  auto max_entry = *free_indexes.rbegin();
@@ -692,16 +694,15 @@ public:
692
694
 
693
695
  unique_ptr<FileBuffer> ReadTemporaryBuffer(block_id_t id, idx_t block_index,
694
696
  unique_ptr<FileBuffer> reusable_buffer) {
695
- auto buffer =
696
- ReadTemporaryBufferInternal(BufferManager::GetBufferManager(db), *handle, GetPositionInFile(block_index),
697
- Storage::BLOCK_SIZE, id, std::move(reusable_buffer));
698
- {
699
- // remove the block (and potentially truncate the temp file)
700
- TemporaryFileLock lock(file_lock);
701
- D_ASSERT(handle);
702
- RemoveTempBlockIndex(lock, block_index);
703
- }
704
- return buffer;
697
+ return ReadTemporaryBufferInternal(BufferManager::GetBufferManager(db), *handle, GetPositionInFile(block_index),
698
+ Storage::BLOCK_SIZE, id, std::move(reusable_buffer));
699
+ }
700
+
701
+ void EraseBlockIndex(block_id_t block_index) {
702
+ // remove the block (and potentially truncate the temp file)
703
+ TemporaryFileLock lock(file_lock);
704
+ D_ASSERT(handle);
705
+ RemoveTempBlockIndex(lock, block_index);
705
706
  }
706
707
 
707
708
  bool DeleteIfEmpty() {
@@ -717,6 +718,14 @@ public:
717
718
  return true;
718
719
  }
719
720
 
721
+ TemporaryFileInformation GetTemporaryFile() {
722
+ TemporaryFileLock lock(file_lock);
723
+ TemporaryFileInformation info;
724
+ info.path = path;
725
+ info.size = GetPositionInFile(index_manager.GetMaxIndex());
726
+ return info;
727
+ }
728
+
720
729
  private:
721
730
  void CreateFileIfNotExists(TemporaryFileLock &) {
722
731
  if (handle) {
@@ -817,7 +826,7 @@ public:
817
826
  {
818
827
  // remove the block (and potentially erase the temp file)
819
828
  TemporaryManagerLock lock(manager_lock);
820
- EraseUsedBlock(lock, id, handle, index.file_index);
829
+ EraseUsedBlock(lock, id, handle, index);
821
830
  }
822
831
  return buffer;
823
832
  }
@@ -826,14 +835,29 @@ public:
826
835
  TemporaryManagerLock lock(manager_lock);
827
836
  auto index = GetTempBlockIndex(lock, id);
828
837
  auto handle = GetFileHandle(lock, index.file_index);
829
- EraseUsedBlock(lock, id, handle, index.file_index);
838
+ EraseUsedBlock(lock, id, handle, index);
839
+ }
840
+
841
+ vector<TemporaryFileInformation> GetTemporaryFiles() {
842
+ lock_guard<mutex> lock(manager_lock);
843
+ vector<TemporaryFileInformation> result;
844
+ for (auto &file : files) {
845
+ result.push_back(file.second->GetTemporaryFile());
846
+ }
847
+ return result;
830
848
  }
831
849
 
832
850
  private:
833
- void EraseUsedBlock(TemporaryManagerLock &lock, block_id_t id, TemporaryFileHandle *handle, idx_t file_index) {
834
- used_blocks.erase(id);
851
+ void EraseUsedBlock(TemporaryManagerLock &lock, block_id_t id, TemporaryFileHandle *handle,
852
+ TemporaryFileIndex index) {
853
+ auto entry = used_blocks.find(id);
854
+ if (entry == used_blocks.end()) {
855
+ throw InternalException("EraseUsedBlock - Block %llu not found in used blocks", id);
856
+ }
857
+ used_blocks.erase(entry);
858
+ handle->EraseBlockIndex(index.block_index);
835
859
  if (handle->DeleteIfEmpty()) {
836
- EraseFileHandle(lock, file_index);
860
+ EraseFileHandle(lock, index.file_index);
837
861
  }
838
862
  }
839
863
 
@@ -868,7 +892,10 @@ TemporaryDirectoryHandle::TemporaryDirectoryHandle(DatabaseInstance &db, string
868
892
  : db(db), temp_directory(std::move(path_p)), temp_file(make_unique<TemporaryFileManager>(db, temp_directory)) {
869
893
  auto &fs = FileSystem::GetFileSystem(db);
870
894
  if (!temp_directory.empty()) {
871
- fs.CreateDirectory(temp_directory);
895
+ if (!fs.DirectoryExists(temp_directory)) {
896
+ fs.CreateDirectory(temp_directory);
897
+ created_directory = true;
898
+ }
872
899
  }
873
900
  }
874
901
  TemporaryDirectoryHandle::~TemporaryDirectoryHandle() {
@@ -877,7 +904,30 @@ TemporaryDirectoryHandle::~TemporaryDirectoryHandle() {
877
904
  // then delete the temporary file directory
878
905
  auto &fs = FileSystem::GetFileSystem(db);
879
906
  if (!temp_directory.empty()) {
880
- fs.RemoveDirectory(temp_directory);
907
+ bool delete_directory = created_directory;
908
+ vector<string> files_to_delete;
909
+ if (!created_directory) {
910
+ bool deleted_everything = true;
911
+ fs.ListFiles(temp_directory, [&](const string &path, bool isdir) {
912
+ if (isdir) {
913
+ deleted_everything = false;
914
+ return;
915
+ }
916
+ if (!StringUtil::StartsWith(path, "duckdb_temp_")) {
917
+ deleted_everything = false;
918
+ return;
919
+ }
920
+ files_to_delete.push_back(path);
921
+ });
922
+ }
923
+ if (delete_directory) {
924
+ // we want to remove all files in the directory
925
+ fs.RemoveDirectory(temp_directory);
926
+ } else {
927
+ for (auto &file : files_to_delete) {
928
+ fs.RemoveFile(fs.JoinPath(temp_directory, file));
929
+ }
930
+ }
881
931
  }
882
932
  }
883
933
 
@@ -887,7 +937,7 @@ TemporaryFileManager &TemporaryDirectoryHandle::GetTempFile() {
887
937
 
888
938
  string BufferManager::GetTemporaryPath(block_id_t id) {
889
939
  auto &fs = FileSystem::GetFileSystem(db);
890
- return fs.JoinPath(temp_directory, to_string(id) + ".block");
940
+ return fs.JoinPath(temp_directory, "duckdb_temp_block-" + to_string(id) + ".block");
891
941
  }
892
942
 
893
943
  void BufferManager::RequireTemporaryDirectory() {
@@ -965,6 +1015,35 @@ void BufferManager::DeleteTemporaryFile(block_id_t id) {
965
1015
  }
966
1016
  }
967
1017
 
1018
+ vector<TemporaryFileInformation> BufferManager::GetTemporaryFiles() {
1019
+ vector<TemporaryFileInformation> result;
1020
+ if (temp_directory.empty()) {
1021
+ return result;
1022
+ }
1023
+ {
1024
+ lock_guard<mutex> temp_handle_guard(temp_handle_lock);
1025
+ if (temp_directory_handle) {
1026
+ result = temp_directory_handle->GetTempFile().GetTemporaryFiles();
1027
+ }
1028
+ }
1029
+ auto &fs = FileSystem::GetFileSystem(db);
1030
+ fs.ListFiles(temp_directory, [&](const string &name, bool is_dir) {
1031
+ if (is_dir) {
1032
+ return;
1033
+ }
1034
+ if (!StringUtil::EndsWith(name, ".block")) {
1035
+ return;
1036
+ }
1037
+ TemporaryFileInformation info;
1038
+ info.path = name;
1039
+ auto handle = fs.OpenFile(name, FileFlags::FILE_FLAGS_READ);
1040
+ info.size = fs.GetFileSize(*handle);
1041
+ handle.reset();
1042
+ result.push_back(info);
1043
+ });
1044
+ return result;
1045
+ }
1046
+
968
1047
  string BufferManager::InMemoryWarning() {
969
1048
  if (!temp_directory.empty()) {
970
1049
  return "";
@@ -447,8 +447,8 @@ public:
447
447
 
448
448
  static void ReserveSpace(BitpackingCompressState<T> *state, idx_t data_bytes) {
449
449
  idx_t meta_bytes = sizeof(bitpacking_metadata_encoded_t);
450
- state->FlushAndCreateSegmentIfFull(data_bytes + meta_bytes);
451
- D_ASSERT(data_bytes + meta_bytes <= state->RemainingSize());
450
+ state->FlushAndCreateSegmentIfFull(data_bytes, meta_bytes);
451
+ D_ASSERT(state->CanStore(data_bytes, meta_bytes));
452
452
  }
453
453
 
454
454
  static void UpdateStats(BitpackingCompressState<T> *state, idx_t count) {
@@ -461,9 +461,12 @@ public:
461
461
  }
462
462
  };
463
463
 
464
- // Space remaining between the metadata_ptr growing down and data ptr growing up
465
- idx_t RemainingSize() {
466
- return metadata_ptr - data_ptr;
464
+ bool CanStore(idx_t data_bytes, idx_t meta_bytes) {
465
+ auto required_data_bytes = AlignValue<idx_t>((data_ptr + data_bytes) - data_ptr);
466
+ auto required_meta_bytes = Storage::BLOCK_SIZE - (metadata_ptr - data_ptr) + meta_bytes;
467
+
468
+ return required_data_bytes + required_meta_bytes <=
469
+ Storage::BLOCK_SIZE - BitpackingPrimitives::BITPACKING_HEADER_SIZE;
467
470
  }
468
471
 
469
472
  void CreateEmptySegment(idx_t row_start) {
@@ -489,8 +492,8 @@ public:
489
492
  }
490
493
  }
491
494
 
492
- void FlushAndCreateSegmentIfFull(idx_t required_space) {
493
- if (RemainingSize() < required_space) {
495
+ void FlushAndCreateSegmentIfFull(idx_t required_data_bytes, idx_t required_meta_bytes) {
496
+ if (!CanStore(required_data_bytes, required_meta_bytes)) {
494
497
  auto row_start = current_segment->start + current_segment->count;
495
498
  FlushSegment();
496
499
  CreateEmptySegment(row_start);
@@ -505,6 +508,12 @@ public:
505
508
  idx_t metadata_offset = AlignValue(data_ptr - base_ptr);
506
509
  idx_t metadata_size = base_ptr + Storage::BLOCK_SIZE - metadata_ptr;
507
510
  idx_t total_segment_size = metadata_offset + metadata_size;
511
+
512
+ // Asserting things are still sane here
513
+ if (!CanStore(0, 0)) {
514
+ throw InternalException("Error in bitpacking size calculation");
515
+ }
516
+
508
517
  memmove(base_ptr + metadata_offset, metadata_ptr, metadata_size);
509
518
 
510
519
  // Store the offset of the metadata of the first group (which is at the highest address).
@@ -1144,15 +1144,78 @@ void DataTable::UpdateColumn(TableCatalogEntry &table, ClientContext &context, V
1144
1144
  }
1145
1145
 
1146
1146
  //===--------------------------------------------------------------------===//
1147
- // Create Index Scan
1147
+ // Index Scan
1148
1148
  //===--------------------------------------------------------------------===//
1149
- void DataTable::InitializeCreateIndexScan(CreateIndexScanState &state, const vector<column_t> &column_ids) {
1149
+ void DataTable::InitializeWALCreateIndexScan(CreateIndexScanState &state, const vector<column_t> &column_ids) {
1150
1150
  // we grab the append lock to make sure nothing is appended until AFTER we finish the index scan
1151
1151
  state.append_lock = std::unique_lock<mutex>(append_lock);
1152
- row_groups->InitializeCreateIndexScan(state);
1153
1152
  InitializeScan(state, column_ids);
1154
1153
  }
1155
1154
 
1155
+ void DataTable::WALAddIndex(ClientContext &context, unique_ptr<Index> index,
1156
+ const vector<unique_ptr<Expression>> &expressions) {
1157
+
1158
+ // if the data table is empty
1159
+ if (row_groups->IsEmpty()) {
1160
+ info->indexes.AddIndex(std::move(index));
1161
+ return;
1162
+ }
1163
+
1164
+ auto &allocator = Allocator::Get(db);
1165
+
1166
+ DataChunk result;
1167
+ result.Initialize(allocator, index->logical_types);
1168
+
1169
+ DataChunk intermediate;
1170
+ vector<LogicalType> intermediate_types;
1171
+ auto column_ids = index->column_ids;
1172
+ column_ids.push_back(COLUMN_IDENTIFIER_ROW_ID);
1173
+ for (auto &id : index->column_ids) {
1174
+ auto &col = column_definitions[id];
1175
+ intermediate_types.push_back(col.Type());
1176
+ }
1177
+ intermediate_types.emplace_back(LogicalType::ROW_TYPE);
1178
+ intermediate.Initialize(allocator, intermediate_types);
1179
+
1180
+ // initialize an index scan
1181
+ CreateIndexScanState state;
1182
+ InitializeWALCreateIndexScan(state, column_ids);
1183
+
1184
+ if (!is_root) {
1185
+ throw InternalException("Error during WAL replay. Cannot add an index to a table that has been altered.");
1186
+ }
1187
+
1188
+ // now start incrementally building the index
1189
+ {
1190
+ IndexLock lock;
1191
+ index->InitializeLock(lock);
1192
+
1193
+ while (true) {
1194
+ intermediate.Reset();
1195
+ result.Reset();
1196
+ // scan a new chunk from the table to index
1197
+ CreateIndexScan(state, intermediate, TableScanType::TABLE_SCAN_COMMITTED_ROWS_OMIT_PERMANENTLY_DELETED);
1198
+ if (intermediate.size() == 0) {
1199
+ // finished scanning for index creation
1200
+ // release all locks
1201
+ break;
1202
+ }
1203
+ // resolve the expressions for this chunk
1204
+ index->ExecuteExpressions(intermediate, result);
1205
+
1206
+ // insert into the index
1207
+ if (!index->Insert(lock, result, intermediate.data[intermediate.ColumnCount() - 1])) {
1208
+ throw InternalException("Error during WAL replay. Can't create unique index, table contains "
1209
+ "duplicate data on indexed column(s).");
1210
+ }
1211
+ }
1212
+ }
1213
+ info->indexes.AddIndex(std::move(index));
1214
+ }
1215
+
1216
+ //===--------------------------------------------------------------------===//
1217
+ // Statistics
1218
+ //===--------------------------------------------------------------------===//
1156
1219
  unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, column_t column_id) {
1157
1220
  if (column_id == COLUMN_IDENTIFIER_ROW_ID) {
1158
1221
  return nullptr;
@@ -86,7 +86,7 @@ bool Index::IndexIsUpdated(const vector<PhysicalIndex> &column_ids) const {
86
86
  return false;
87
87
  }
88
88
 
89
- BlockPointer Index::Serialize(duckdb::MetaBlockWriter &writer) {
89
+ BlockPointer Index::Serialize(MetaBlockWriter &writer) {
90
90
  throw NotImplementedException("The implementation of this index serialization does not exist.");
91
91
  }
92
92
 
@@ -127,7 +127,7 @@ LocalTableStorage::LocalTableStorage(DataTable &table)
127
127
  unbound_expressions.push_back(expr->Copy());
128
128
  }
129
129
  indexes.AddIndex(make_unique<ART>(art.column_ids, art.table_io_manager, std::move(unbound_expressions),
130
- art.constraint_type, art.db, false));
130
+ art.constraint_type, art.db, true));
131
131
  }
132
132
  return false;
133
133
  });
@@ -27,8 +27,10 @@ ColumnData::ColumnData(BlockManager &block_manager, DataTableInfo &info, idx_t c
27
27
 
28
28
  ColumnData::ColumnData(ColumnData &other, idx_t start, ColumnData *parent)
29
29
  : block_manager(other.block_manager), info(other.info), column_index(other.column_index), start(start),
30
- type(std::move(other.type)), parent(parent), updates(std::move(other.updates)),
31
- version(parent ? parent->version + 1 : 0) {
30
+ type(std::move(other.type)), parent(parent), version(parent ? parent->version + 1 : 0) {
31
+ if (other.updates) {
32
+ updates = make_unique<UpdateSegment>(*other.updates, *this);
33
+ }
32
34
  idx_t offset = 0;
33
35
  for (auto segment = other.data.GetRootSegment(); segment; segment = segment->Next()) {
34
36
  auto &other = (ColumnSegment &)*segment;
@@ -36,6 +36,21 @@ UpdateSegment::UpdateSegment(ColumnData &column_data)
36
36
  this->statistics_update_function = GetStatisticsUpdateFunction(physical_type);
37
37
  }
38
38
 
39
+ UpdateSegment::UpdateSegment(UpdateSegment &other, ColumnData &owner)
40
+ : column_data(owner), root(std::move(other.root)), stats(std::move(other.stats)), type_size(other.type_size) {
41
+
42
+ this->heap.Move(other.heap);
43
+
44
+ initialize_update_function = other.initialize_update_function;
45
+ merge_update_function = other.merge_update_function;
46
+ fetch_update_function = other.fetch_update_function;
47
+ fetch_committed_function = other.fetch_committed_function;
48
+ fetch_committed_range = other.fetch_committed_range;
49
+ fetch_row_function = other.fetch_row_function;
50
+ rollback_update_function = other.rollback_update_function;
51
+ statistics_update_function = other.statistics_update_function;
52
+ }
53
+
39
54
  UpdateSegment::~UpdateSegment() {
40
55
  }
41
56
 
@@ -60,8 +60,7 @@ void TableIndexList::VerifyForeignKey(const vector<PhysicalIndex> &fk_keys, Data
60
60
  throw InternalException("Internal Foreign Key error: could not find index to verify...");
61
61
  }
62
62
  conflict_manager.SetIndexCount(1);
63
-
64
- index->LookupValues(chunk, conflict_manager);
63
+ index->CheckConstraintsForChunk(chunk, conflict_manager);
65
64
  }
66
65
 
67
66
  vector<column_t> TableIndexList::GetRequiredColumns() {
@@ -18,6 +18,8 @@
18
18
  #include "duckdb/storage/write_ahead_log.hpp"
19
19
  #include "duckdb/storage/storage_manager.hpp"
20
20
  #include "duckdb/main/attached_database.hpp"
21
+ #include "duckdb/execution/index/art/art.hpp"
22
+ #include "duckdb/catalog/catalog_entry/duck_index_entry.hpp"
21
23
 
22
24
  namespace duckdb {
23
25
 
@@ -154,6 +156,12 @@ void ReplayState::ReplayEntry(WALType entry_type) {
154
156
  case WALType::DROP_TABLE_MACRO:
155
157
  ReplayDropTableMacro();
156
158
  break;
159
+ case WALType::CREATE_INDEX:
160
+ ReplayCreateIndex();
161
+ break;
162
+ case WALType::DROP_INDEX:
163
+ ReplayDropIndex();
164
+ break;
157
165
  case WALType::USE_TABLE:
158
166
  ReplayUseTable();
159
167
  break;
@@ -379,6 +387,66 @@ void ReplayState::ReplayDropTableMacro() {
379
387
  catalog.DropEntry(context, &info);
380
388
  }
381
389
 
390
+ //===--------------------------------------------------------------------===//
391
+ // Replay Index
392
+ //===--------------------------------------------------------------------===//
393
+ void ReplayState::ReplayCreateIndex() {
394
+
395
+ auto info = IndexCatalogEntry::Deserialize(source, context);
396
+ if (deserialize_only) {
397
+ return;
398
+ }
399
+
400
+ // get the physical table to which we'll add the index
401
+ auto table = catalog.GetEntry<TableCatalogEntry>(context, info->schema, info->table->table_name);
402
+ auto &data_table = table->GetStorage();
403
+
404
+ // bind the parsed expressions
405
+ if (info->expressions.empty()) {
406
+ for (auto &parsed_expr : info->parsed_expressions) {
407
+ info->expressions.push_back(parsed_expr->Copy());
408
+ }
409
+ }
410
+ auto binder = Binder::CreateBinder(context);
411
+ auto expressions = binder->BindCreateIndexExpressions(table, info.get());
412
+
413
+ // create the empty index
414
+ unique_ptr<Index> index;
415
+ switch (info->index_type) {
416
+ case IndexType::ART: {
417
+ index = make_unique<ART>(info->column_ids, TableIOManager::Get(data_table), expressions, info->constraint_type,
418
+ data_table.db, true);
419
+ break;
420
+ }
421
+ default:
422
+ throw InternalException("Unimplemented index type");
423
+ }
424
+
425
+ // add the index to the catalog
426
+ auto index_entry = (DuckIndexEntry *)catalog.CreateIndex(context, info.get());
427
+ index_entry->index = index.get();
428
+ index_entry->info = data_table.info;
429
+ for (auto &parsed_expr : info->parsed_expressions) {
430
+ index_entry->parsed_expressions.push_back(parsed_expr->Copy());
431
+ }
432
+
433
+ // physically add the index to the data table storage
434
+ data_table.WALAddIndex(context, std::move(index), expressions);
435
+ }
436
+
437
+ void ReplayState::ReplayDropIndex() {
438
+
439
+ DropInfo info;
440
+ info.type = CatalogType::INDEX_ENTRY;
441
+ info.schema = source.Read<string>();
442
+ info.name = source.Read<string>();
443
+ if (deserialize_only) {
444
+ return;
445
+ }
446
+
447
+ catalog.DropEntry(context, &info);
448
+ }
449
+
382
450
  //===--------------------------------------------------------------------===//
383
451
  // Replay Data
384
452
  //===--------------------------------------------------------------------===//
@@ -119,7 +119,7 @@ void WriteAheadLog::WriteSequenceValue(SequenceCatalogEntry *entry, SequenceValu
119
119
  }
120
120
 
121
121
  //===--------------------------------------------------------------------===//
122
- // MACRO'S
122
+ // MACROS
123
123
  //===--------------------------------------------------------------------===//
124
124
  void WriteAheadLog::WriteCreateMacro(ScalarMacroCatalogEntry *entry) {
125
125
  if (skip_writing) {
@@ -155,6 +155,26 @@ void WriteAheadLog::WriteDropTableMacro(TableMacroCatalogEntry *entry) {
155
155
  writer->WriteString(entry->name);
156
156
  }
157
157
 
158
+ //===--------------------------------------------------------------------===//
159
+ // Indexes
160
+ //===--------------------------------------------------------------------===//
161
+ void WriteAheadLog::WriteCreateIndex(IndexCatalogEntry *entry) {
162
+ if (skip_writing) {
163
+ return;
164
+ }
165
+ writer->Write<WALType>(WALType::CREATE_INDEX);
166
+ entry->Serialize(*writer);
167
+ }
168
+
169
+ void WriteAheadLog::WriteDropIndex(IndexCatalogEntry *entry) {
170
+ if (skip_writing) {
171
+ return;
172
+ }
173
+ writer->Write<WALType>(WALType::DROP_INDEX);
174
+ writer->WriteString(entry->schema->name);
175
+ writer->WriteString(entry->name);
176
+ }
177
+
158
178
  //===--------------------------------------------------------------------===//
159
179
  // Custom Types
160
180
  //===--------------------------------------------------------------------===//
@@ -87,7 +87,9 @@ void CommitState::WriteCatalogEntry(CatalogEntry *entry, data_ptr_t dataptr) {
87
87
  case CatalogType::TABLE_MACRO_ENTRY:
88
88
  log->WriteCreateTableMacro((TableMacroCatalogEntry *)parent);
89
89
  break;
90
-
90
+ case CatalogType::INDEX_ENTRY:
91
+ log->WriteCreateIndex((IndexCatalogEntry *)parent);
92
+ break;
91
93
  case CatalogType::TYPE_ENTRY:
92
94
  log->WriteCreateType((TypeCatalogEntry *)parent);
93
95
  break;
@@ -119,6 +121,8 @@ void CommitState::WriteCatalogEntry(CatalogEntry *entry, data_ptr_t dataptr) {
119
121
  log->WriteDropType((TypeCatalogEntry *)entry);
120
122
  break;
121
123
  case CatalogType::INDEX_ENTRY:
124
+ log->WriteDropIndex((IndexCatalogEntry *)entry);
125
+ break;
122
126
  case CatalogType::PREPARED_STATEMENT:
123
127
  case CatalogType::SCALAR_FUNCTION_ENTRY:
124
128
  // do nothing, indexes/prepared statements/functions aren't persisted to disk
@@ -127,7 +131,6 @@ void CommitState::WriteCatalogEntry(CatalogEntry *entry, data_ptr_t dataptr) {
127
131
  throw InternalException("Don't know how to drop this type!");
128
132
  }
129
133
  break;
130
- case CatalogType::INDEX_ENTRY:
131
134
  case CatalogType::PREPARED_STATEMENT:
132
135
  case CatalogType::AGGREGATE_FUNCTION_ENTRY:
133
136
  case CatalogType::SCALAR_FUNCTION_ENTRY:
@@ -24,8 +24,8 @@ template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
24
24
  class BlockingConcurrentQueue
25
25
  {
26
26
  private:
27
- typedef ::duckdb_moodycamelmoodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
28
- typedef ::duckdb_moodycamelmoodycamel::LightweightSemaphore LightweightSemaphore;
27
+ typedef ::duckdb_moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
28
+ typedef ::duckdb_moodycamel::LightweightSemaphore LightweightSemaphore;
29
29
 
30
30
  public:
31
31
  typedef typename ConcurrentQueue::producer_token_t producer_token_t;
@@ -359,12 +359,11 @@ using wstring_view = basic_string_view<wchar_t>;
359
359
  #if FMT_HAS_FEATURE(__cpp_char8_t)
360
360
  typedef char8_t fmt_char8_t;
361
361
  #else
362
- typedef unsigned char fmt_char8_t;
362
+ typedef char fmt_char8_t;
363
363
  #endif
364
364
 
365
365
  /** Specifies if ``T`` is a character type. Can be specialized by users. */
366
366
  template <typename T> struct is_char : std::false_type {};
367
- template <> struct is_char<char> : std::true_type {};
368
367
  template <> struct is_char<wchar_t> : std::true_type {};
369
368
  template <> struct is_char<fmt_char8_t> : std::true_type {};
370
369
  template <> struct is_char<char16_t> : std::true_type {};
@@ -422,6 +422,7 @@ typedef enum PGNodeTag {
422
422
  T_PGExportStmt,
423
423
  T_PGImportStmt,
424
424
  T_PGAttachStmt,
425
+ T_PGDetachStmt,
425
426
  T_PGCreateDatabaseStmt,
426
427
  T_PGUseStmt,
427
428
 
@@ -2070,6 +2070,20 @@ typedef struct PGAttachStmt
2070
2070
  PGNode *query;
2071
2071
  } PGAttachStmt;
2072
2072
 
2073
+ /* ----------------------
2074
+ * Dettach Statement
2075
+ * ----------------------
2076
+ */
2077
+
2078
+ typedef struct PGDetachStmt
2079
+ {
2080
+ PGNodeTag type;
2081
+ char *db_name; /* list of names of attached databases */
2082
+ bool missing_ok;
2083
+ } PGDetachStmt;
2084
+
2085
+
2086
+
2073
2087
  /* ----------------------
2074
2088
  * CREATE DATABASE Statement
2075
2089
  * ----------------------