duckdb 1.4.2-dev4.0 → 1.4.3-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/package.json +2 -2
  2. package/src/duckdb/extension/icu/icu_extension.cpp +67 -6
  3. package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +9 -3
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +12 -0
  5. package/src/duckdb/extension/json/json_functions/json_create.cpp +10 -10
  6. package/src/duckdb/extension/parquet/decoder/delta_length_byte_array_decoder.cpp +19 -5
  7. package/src/duckdb/extension/parquet/include/decoder/delta_length_byte_array_decoder.hpp +1 -1
  8. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +11 -2
  9. package/src/duckdb/extension/parquet/include/reader/string_column_reader.hpp +2 -1
  10. package/src/duckdb/extension/parquet/parquet_reader.cpp +3 -1
  11. package/src/duckdb/extension/parquet/parquet_writer.cpp +16 -1
  12. package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
  13. package/src/duckdb/extension/parquet/writer/primitive_column_writer.cpp +1 -1
  14. package/src/duckdb/src/catalog/default/default_table_functions.cpp +1 -1
  15. package/src/duckdb/src/common/adbc/adbc.cpp +8 -6
  16. package/src/duckdb/src/common/csv_writer.cpp +1 -13
  17. package/src/duckdb/src/common/encryption_key_manager.cpp +10 -9
  18. package/src/duckdb/src/common/enum_util.cpp +19 -0
  19. package/src/duckdb/src/common/enums/compression_type.cpp +51 -16
  20. package/src/duckdb/src/common/exception/binder_exception.cpp +7 -2
  21. package/src/duckdb/src/common/progress_bar/unscented_kalman_filter.cpp +2 -2
  22. package/src/duckdb/src/common/random_engine.cpp +10 -0
  23. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +13 -2
  24. package/src/duckdb/src/execution/index/art/art.cpp +6 -3
  25. package/src/duckdb/src/execution/index/bound_index.cpp +32 -21
  26. package/src/duckdb/src/execution/index/unbound_index.cpp +20 -9
  27. package/src/duckdb/src/execution/join_hashtable.cpp +9 -3
  28. package/src/duckdb/src/execution/operator/helper/physical_buffered_batch_collector.cpp +1 -1
  29. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
  30. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +5 -0
  31. package/src/duckdb/src/function/cast/cast_function_set.cpp +3 -1
  32. package/src/duckdb/src/function/macro_function.cpp +1 -1
  33. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +1 -1
  34. package/src/duckdb/src/function/scalar/create_sort_key.cpp +5 -3
  35. package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +1 -1
  36. package/src/duckdb/src/function/scalar/system/parse_log_message.cpp +4 -2
  37. package/src/duckdb/src/function/table/copy_csv.cpp +28 -4
  38. package/src/duckdb/src/function/table/direct_file_reader.cpp +10 -0
  39. package/src/duckdb/src/function/table/read_file.cpp +65 -1
  40. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  41. package/src/duckdb/src/include/duckdb/common/csv_writer.hpp +0 -3
  42. package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +2 -0
  43. package/src/duckdb/src/include/duckdb/common/encryption_state.hpp +5 -0
  44. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  45. package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +42 -2
  46. package/src/duckdb/src/include/duckdb/common/http_util.hpp +7 -0
  47. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +0 -11
  49. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +2 -0
  50. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +1 -0
  51. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +6 -6
  52. package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +115 -97
  53. package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +54 -0
  54. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +21 -2
  55. package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +26 -8
  56. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +2 -0
  57. package/src/duckdb/src/include/duckdb/function/table/read_file.hpp +0 -49
  58. package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +1 -1
  59. package/src/duckdb/src/include/duckdb/logging/log_type.hpp +14 -0
  60. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +2 -1
  61. package/src/duckdb/src/include/duckdb/main/buffered_data/batched_buffered_data.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/main/buffered_data/buffered_data.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/main/buffered_data/simple_buffered_data.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  65. package/src/duckdb/src/include/duckdb/main/database.hpp +2 -2
  66. package/src/duckdb/src/include/duckdb/main/database_file_path_manager.hpp +10 -6
  67. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +4 -0
  68. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +1 -0
  69. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +1 -0
  70. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -0
  71. package/src/duckdb/src/include/duckdb/main/relation/insert_relation.hpp +2 -0
  72. package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/main/relation.hpp +10 -2
  74. package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
  75. package/src/duckdb/src/include/duckdb/optimizer/filter_pullup.hpp +10 -14
  76. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +5 -1
  77. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +3 -0
  78. package/src/duckdb/src/include/duckdb/planner/bound_statement.hpp +1 -0
  79. package/src/duckdb/src/include/duckdb/storage/block.hpp +9 -0
  80. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +9 -2
  81. package/src/duckdb/src/include/duckdb/storage/index.hpp +8 -2
  82. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  83. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +0 -7
  85. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +6 -2
  86. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +6 -0
  87. package/src/duckdb/src/logging/log_manager.cpp +2 -1
  88. package/src/duckdb/src/logging/log_types.cpp +30 -1
  89. package/src/duckdb/src/main/attached_database.cpp +4 -7
  90. package/src/duckdb/src/main/buffered_data/batched_buffered_data.cpp +2 -3
  91. package/src/duckdb/src/main/buffered_data/buffered_data.cpp +2 -3
  92. package/src/duckdb/src/main/buffered_data/simple_buffered_data.cpp +1 -2
  93. package/src/duckdb/src/main/capi/prepared-c.cpp +9 -2
  94. package/src/duckdb/src/main/config.cpp +6 -5
  95. package/src/duckdb/src/main/database.cpp +9 -3
  96. package/src/duckdb/src/main/database_file_path_manager.cpp +43 -14
  97. package/src/duckdb/src/main/database_manager.cpp +1 -1
  98. package/src/duckdb/src/main/http/http_util.cpp +19 -1
  99. package/src/duckdb/src/main/profiling_info.cpp +11 -0
  100. package/src/duckdb/src/main/query_profiler.cpp +16 -0
  101. package/src/duckdb/src/main/relation/create_table_relation.cpp +9 -0
  102. package/src/duckdb/src/main/relation/insert_relation.cpp +7 -0
  103. package/src/duckdb/src/main/relation/table_relation.cpp +14 -0
  104. package/src/duckdb/src/main/relation.cpp +28 -12
  105. package/src/duckdb/src/main/settings/custom_settings.cpp +9 -3
  106. package/src/duckdb/src/optimizer/filter_pullup.cpp +14 -0
  107. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +29 -10
  108. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +7 -0
  109. package/src/duckdb/src/parallel/task_executor.cpp +4 -2
  110. package/src/duckdb/src/parser/query_node/cte_node.cpp +79 -0
  111. package/src/duckdb/src/parser/transform/expression/transform_cast.cpp +3 -1
  112. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +1 -0
  113. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +12 -4
  114. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +16 -12
  115. package/src/duckdb/src/planner/binder/statement/bind_merge_into.cpp +42 -5
  116. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +0 -24
  117. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +1 -1
  118. package/src/duckdb/src/planner/binder.cpp +0 -1
  119. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +1 -2
  120. package/src/duckdb/src/storage/buffer/block_manager.cpp +20 -6
  121. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +8 -6
  122. package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -22
  123. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +7 -0
  124. package/src/duckdb/src/storage/compression/zstd.cpp +34 -12
  125. package/src/duckdb/src/storage/data_table.cpp +1 -1
  126. package/src/duckdb/src/storage/local_storage.cpp +15 -2
  127. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -6
  128. package/src/duckdb/src/storage/metadata/metadata_reader.cpp +11 -15
  129. package/src/duckdb/src/storage/metadata/metadata_writer.cpp +1 -1
  130. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +1 -19
  131. package/src/duckdb/src/storage/single_file_block_manager.cpp +33 -3
  132. package/src/duckdb/src/storage/standard_buffer_manager.cpp +3 -1
  133. package/src/duckdb/src/storage/storage_info.cpp +4 -0
  134. package/src/duckdb/src/storage/storage_manager.cpp +8 -0
  135. package/src/duckdb/src/storage/table/array_column_data.cpp +1 -1
  136. package/src/duckdb/src/storage/table/column_data.cpp +3 -2
  137. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -2
  138. package/src/duckdb/src/storage/table/row_group.cpp +41 -24
  139. package/src/duckdb/src/storage/table/row_group_collection.cpp +114 -11
  140. package/src/duckdb/src/storage/table_index_list.cpp +18 -5
  141. package/src/duckdb/src/transaction/cleanup_state.cpp +7 -2
  142. package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +5 -0
  143. package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +8 -21
  144. package/src/duckdb/third_party/parquet/parquet_types.cpp +57 -35
  145. package/src/duckdb/third_party/parquet/parquet_types.h +9 -2
  146. package/src/duckdb/ub_src_common_types_row.cpp +0 -2
@@ -16,15 +16,28 @@ namespace duckdb {
16
16
 
17
17
  class ColumnDataCollection;
18
18
 
19
+ enum class BufferedIndexReplay : uint8_t { INSERT_ENTRY = 0, DEL_ENTRY = 1 };
20
+
21
+ struct BufferedIndexData {
22
+ BufferedIndexReplay type;
23
+ unique_ptr<ColumnDataCollection> data;
24
+
25
+ BufferedIndexData(BufferedIndexReplay replay_type, unique_ptr<ColumnDataCollection> data_p);
26
+ };
27
+
19
28
  class UnboundIndex final : public Index {
20
29
  private:
21
30
  //! The CreateInfo of the index.
22
31
  unique_ptr<CreateInfo> create_info;
23
32
  //! The serialized storage information of the index.
24
33
  IndexStorageInfo storage_info;
25
- //! Buffer for WAL replay appends.
26
- unique_ptr<ColumnDataCollection> buffered_appends;
27
- //! Maps the column IDs in the buffered appends to the table columns.
34
+ //! Buffer for WAL replays.
35
+ vector<BufferedIndexData> buffered_replays;
36
+
37
+ //! Maps the column IDs in the buffered replays to a physical table offset.
38
+ //! For example, column [i] in a buffered ColumnDataCollection is the data for an Indexed column with
39
+ //! physical table index mapped_column_ids[i].
40
+ //! This is in sorted order of physical column IDs.
28
41
  vector<StorageIndex> mapped_column_ids;
29
42
 
30
43
  public:
@@ -59,12 +72,17 @@ public:
59
72
 
60
73
  void CommitDrop() override;
61
74
 
62
- void BufferChunk(DataChunk &chunk, Vector &row_ids, const vector<StorageIndex> &mapped_column_ids_p);
63
- bool HasBufferedAppends() const {
64
- return buffered_appends != nullptr;
75
+ //! Buffer Index delete or insert (replay_type) data chunk.
76
+ //! See note above on mapped_column_ids, this function assumes that index_column_chunk maps into
77
+ //! mapped_column_ids_p to get the physical column index for each Indexed column in the chunk.
78
+ void BufferChunk(DataChunk &index_column_chunk, Vector &row_ids, const vector<StorageIndex> &mapped_column_ids_p,
79
+ BufferedIndexReplay replay_type);
80
+ bool HasBufferedReplays() const {
81
+ return !buffered_replays.empty();
65
82
  }
66
- ColumnDataCollection &GetBufferedAppends() const {
67
- return *buffered_appends;
83
+
84
+ vector<BufferedIndexData> &GetBufferedReplays() {
85
+ return buffered_replays;
68
86
  }
69
87
  const vector<StorageIndex> &GetMappedColumnIds() const {
70
88
  return mapped_column_ids;
@@ -277,6 +277,8 @@ public:
277
277
  uint64_t bitmask = DConstants::INVALID_INDEX;
278
278
  //! Whether or not we error on multiple rows found per match in a SINGLE join
279
279
  bool single_join_error_on_multiple_rows = true;
280
+ //! Number of probe matches
281
+ atomic<idx_t> total_probe_matches {0};
280
282
 
281
283
  struct {
282
284
  mutex mj_lock;
@@ -31,53 +31,4 @@ struct ReadFileGlobalState : public GlobalTableFunctionState {
31
31
  bool requires_file_open = false;
32
32
  };
33
33
 
34
- struct ReadBlobOperation {
35
- static constexpr const char *NAME = "read_blob";
36
- static constexpr const char *FILE_TYPE = "blob";
37
-
38
- static inline LogicalType TYPE() {
39
- return LogicalType::BLOB;
40
- }
41
- };
42
-
43
- struct ReadTextOperation {
44
- static constexpr const char *NAME = "read_text";
45
- static constexpr const char *FILE_TYPE = "text";
46
-
47
- static inline LogicalType TYPE() {
48
- return LogicalType::VARCHAR;
49
- }
50
- };
51
-
52
- template <class OP>
53
- struct DirectMultiFileInfo : MultiFileReaderInterface {
54
- static unique_ptr<MultiFileReaderInterface> CreateInterface(ClientContext &context);
55
- unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context,
56
- optional_ptr<TableFunctionInfo> info) override;
57
- bool ParseCopyOption(ClientContext &context, const string &key, const vector<Value> &values,
58
- BaseFileReaderOptions &options, vector<string> &expected_names,
59
- vector<LogicalType> &expected_types) override;
60
- bool ParseOption(ClientContext &context, const string &key, const Value &val, MultiFileOptions &file_options,
61
- BaseFileReaderOptions &options) override;
62
- unique_ptr<TableFunctionData> InitializeBindData(MultiFileBindData &multi_file_data,
63
- unique_ptr<BaseFileReaderOptions> options) override;
64
- void BindReader(ClientContext &context, vector<LogicalType> &return_types, vector<string> &names,
65
- MultiFileBindData &bind_data) override;
66
- optional_idx MaxThreads(const MultiFileBindData &bind_data_p, const MultiFileGlobalState &global_state,
67
- FileExpandResult expand_result) override;
68
- unique_ptr<GlobalTableFunctionState> InitializeGlobalState(ClientContext &context, MultiFileBindData &bind_data,
69
- MultiFileGlobalState &global_state) override;
70
- unique_ptr<LocalTableFunctionState> InitializeLocalState(ExecutionContext &, GlobalTableFunctionState &) override;
71
- shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
72
- BaseUnionData &union_data, const MultiFileBindData &bind_data_p) override;
73
- shared_ptr<BaseFileReader> CreateReader(ClientContext &context, GlobalTableFunctionState &gstate,
74
- const OpenFileInfo &file, idx_t file_idx,
75
- const MultiFileBindData &bind_data) override;
76
- shared_ptr<BaseFileReader> CreateReader(ClientContext &context, const OpenFileInfo &file,
77
- BaseFileReaderOptions &options,
78
- const MultiFileOptions &file_options) override;
79
- unique_ptr<NodeStatistics> GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override;
80
- FileGlobInput GetGlobInput() override;
81
- };
82
-
83
34
  } // namespace duckdb
@@ -21,7 +21,7 @@ class LogType;
21
21
  // - Creates Loggers with cached configuration
22
22
  // - Main sink for logs (either by logging directly into this, or by syncing a pre-cached set of log entries)
23
23
  // - Holds the log storage
24
- class LogManager : public enable_shared_from_this<LogManager> {
24
+ class LogManager {
25
25
  friend class ThreadSafeLogger;
26
26
  friend class ThreadLocalLogger;
27
27
  friend class MutableLogger;
@@ -20,6 +20,7 @@ class PhysicalOperator;
20
20
  class AttachedDatabase;
21
21
  class RowGroup;
22
22
  struct DataTableInfo;
23
+ enum class MetricsType : uint8_t;
23
24
 
24
25
  //! Log types provide some structure to the formats that the different log messages can have
25
26
  //! For now, this holds a type that the VARCHAR value will be auto-cast into.
@@ -106,6 +107,19 @@ public:
106
107
  const vector<pair<string, string>> &info);
107
108
  };
108
109
 
110
+ class MetricsLogType : public LogType {
111
+ public:
112
+ static constexpr const char *NAME = "Metrics";
113
+ static constexpr LogLevel LEVEL = LogLevel::LOG_INFO;
114
+
115
+ //! Construct the log type
116
+ MetricsLogType();
117
+
118
+ static LogicalType GetLogType();
119
+
120
+ static string ConstructLogMessage(const MetricsType &type, const Value &value);
121
+ };
122
+
109
123
  class CheckpointLogType : public LogType {
110
124
  public:
111
125
  static constexpr const char *NAME = "Checkpoint";
@@ -35,9 +35,10 @@ enum class AttachedDatabaseType {
35
35
  class DatabaseFilePathManager;
36
36
 
37
37
  struct StoredDatabasePath {
38
- StoredDatabasePath(DatabaseFilePathManager &manager, string path, const string &name);
38
+ StoredDatabasePath(DatabaseManager &db_manager, DatabaseFilePathManager &manager, string path, const string &name);
39
39
  ~StoredDatabasePath();
40
40
 
41
+ DatabaseManager &db_manager;
41
42
  DatabaseFilePathManager &manager;
42
43
  string path;
43
44
 
@@ -32,7 +32,7 @@ public:
32
32
  static constexpr const BufferedData::Type TYPE = BufferedData::Type::BATCHED;
33
33
 
34
34
  public:
35
- explicit BatchedBufferedData(weak_ptr<ClientContext> context);
35
+ explicit BatchedBufferedData(ClientContext &context);
36
36
 
37
37
  public:
38
38
  void Append(const DataChunk &chunk, idx_t batch);
@@ -28,7 +28,7 @@ protected:
28
28
  enum class Type { SIMPLE, BATCHED };
29
29
 
30
30
  public:
31
- BufferedData(Type type, weak_ptr<ClientContext> context_p);
31
+ BufferedData(Type type, ClientContext &context);
32
32
  virtual ~BufferedData();
33
33
 
34
34
  public:
@@ -24,7 +24,7 @@ public:
24
24
  static constexpr const BufferedData::Type TYPE = BufferedData::Type::SIMPLE;
25
25
 
26
26
  public:
27
- explicit SimpleBufferedData(weak_ptr<ClientContext> context);
27
+ explicit SimpleBufferedData(ClientContext &context);
28
28
  ~SimpleBufferedData() override;
29
29
 
30
30
  public:
@@ -51,6 +51,8 @@ struct PreparedStatementWrapper {
51
51
  //! Map of name -> values
52
52
  case_insensitive_map_t<BoundParameterData> values;
53
53
  unique_ptr<PreparedStatement> statement;
54
+ bool success = true;
55
+ ErrorData error_data;
54
56
  };
55
57
 
56
58
  struct ExtractStatementsWrapper {
@@ -69,7 +69,7 @@ public:
69
69
 
70
70
  DUCKDB_API SettingLookupResult TryGetCurrentSetting(const string &key, Value &result) const;
71
71
 
72
- DUCKDB_API shared_ptr<EncryptionUtil> GetEncryptionUtil() const;
72
+ DUCKDB_API shared_ptr<EncryptionUtil> GetEncryptionUtil();
73
73
 
74
74
  shared_ptr<AttachedDatabase> CreateAttachedDatabase(ClientContext &context, AttachInfo &info,
75
75
  AttachOptions &options);
@@ -90,7 +90,7 @@ private:
90
90
  unique_ptr<ExtensionManager> extension_manager;
91
91
  ValidChecker db_validity;
92
92
  unique_ptr<DatabaseFileSystem> db_file_system;
93
- shared_ptr<LogManager> log_manager;
93
+ unique_ptr<LogManager> log_manager;
94
94
  unique_ptr<ExternalFileCache> external_file_cache;
95
95
 
96
96
  duckdb_ext_api_v1 (*create_api_v1)();
@@ -12,31 +12,35 @@
12
12
  #include "duckdb/common/mutex.hpp"
13
13
  #include "duckdb/common/case_insensitive_map.hpp"
14
14
  #include "duckdb/common/enums/on_create_conflict.hpp"
15
+ #include "duckdb/common/enums/access_mode.hpp"
16
+ #include "duckdb/common/reference_map.hpp"
15
17
 
16
18
  namespace duckdb {
17
19
  struct AttachInfo;
18
20
  struct AttachOptions;
21
+ class DatabaseManager;
19
22
 
20
23
  enum class InsertDatabasePathResult { SUCCESS, ALREADY_EXISTS };
21
24
 
22
25
  struct DatabasePathInfo {
23
- explicit DatabasePathInfo(string name_p) : name(std::move(name_p)), is_attached(true) {
24
- }
26
+ DatabasePathInfo(DatabaseManager &manager, string name_p, AccessMode access_mode);
25
27
 
26
28
  string name;
27
- bool is_attached;
29
+ AccessMode access_mode;
30
+ reference_set_t<DatabaseManager> attached_databases;
31
+ idx_t reference_count = 1;
28
32
  };
29
33
 
30
34
  //! The DatabaseFilePathManager is used to ensure we only ever open a single database file once
31
35
  class DatabaseFilePathManager {
32
36
  public:
33
37
  idx_t ApproxDatabaseCount() const;
34
- InsertDatabasePathResult InsertDatabasePath(const string &path, const string &name, OnCreateConflict on_conflict,
35
- AttachOptions &options);
38
+ InsertDatabasePathResult InsertDatabasePath(DatabaseManager &manager, const string &path, const string &name,
39
+ OnCreateConflict on_conflict, AttachOptions &options);
36
40
  //! Erase a database path - indicating we are done with using it
37
41
  void EraseDatabasePath(const string &path);
38
42
  //! Called when a database is detached, but before it is fully finished being used
39
- void DetachDatabase(const string &path);
43
+ void DetachDatabase(DatabaseManager &manager, const string &path);
40
44
 
41
45
  private:
42
46
  //! The lock to add entries to the database path map
@@ -227,6 +227,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
227
227
  {"iceberg_metadata", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
228
228
  {"iceberg_scan", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
229
229
  {"iceberg_snapshots", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
230
+ {"iceberg_table_properties", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
230
231
  {"iceberg_to_ducklake", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
231
232
  {"icu_calendar_names", "icu", CatalogType::TABLE_FUNCTION_ENTRY},
232
233
  {"icu_collate_af", "icu", CatalogType::SCALAR_FUNCTION_ENTRY},
@@ -525,6 +526,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
525
526
  {"regr_sxx", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY},
526
527
  {"regr_sxy", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY},
527
528
  {"regr_syy", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY},
529
+ {"remove_iceberg_table_properties", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
528
530
  {"repeat", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
529
531
  {"replace", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
530
532
  {"replace_type", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
@@ -540,6 +542,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
540
542
  {"rtrim", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
541
543
  {"sem", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY},
542
544
  {"set_bit", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
545
+ {"set_iceberg_table_properties", "iceberg", CatalogType::TABLE_FUNCTION_ENTRY},
543
546
  {"setseed", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
544
547
  {"shapefile_meta", "spatial", CatalogType::TABLE_FUNCTION_ENTRY},
545
548
  {"sign", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY},
@@ -599,6 +602,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = {
599
602
  {"st_envelope", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
600
603
  {"st_envelope_agg", "spatial", CatalogType::AGGREGATE_FUNCTION_ENTRY},
601
604
  {"st_equals", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
605
+ {"st_expand", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
602
606
  {"st_extent", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
603
607
  {"st_extent_agg", "spatial", CatalogType::AGGREGATE_FUNCTION_ENTRY},
604
608
  {"st_extent_approx", "spatial", CatalogType::SCALAR_FUNCTION_ENTRY},
@@ -56,6 +56,7 @@ public:
56
56
 
57
57
  public:
58
58
  string GetMetricAsString(const MetricsType metric) const;
59
+ void WriteMetricsToLog(ClientContext &context);
59
60
  void WriteMetricsToJSON(duckdb_yyjson::yyjson_mut_doc *doc, duckdb_yyjson::yyjson_mut_val *destination);
60
61
 
61
62
  public:
@@ -183,6 +183,7 @@ public:
183
183
  static InsertionOrderPreservingMap<string> JSONSanitize(const InsertionOrderPreservingMap<string> &input);
184
184
  static string JSONSanitize(const string &text);
185
185
  static string DrawPadded(const string &str, idx_t width);
186
+ DUCKDB_API void ToLog() const;
186
187
  DUCKDB_API string ToJSON() const;
187
188
  DUCKDB_API void WriteToFile(const char *path, string &info) const;
188
189
 
@@ -16,8 +16,11 @@ class CreateTableRelation : public Relation {
16
16
  public:
17
17
  CreateTableRelation(shared_ptr<Relation> child, string schema_name, string table_name, bool temporary,
18
18
  OnCreateConflict on_conflict);
19
+ CreateTableRelation(shared_ptr<Relation> child, string catalog_name, string schema_name, string table_name,
20
+ bool temporary, OnCreateConflict on_conflict);
19
21
 
20
22
  shared_ptr<Relation> child;
23
+ string catalog_name;
21
24
  string schema_name;
22
25
  string table_name;
23
26
  vector<ColumnDefinition> columns;
@@ -15,8 +15,10 @@ namespace duckdb {
15
15
  class InsertRelation : public Relation {
16
16
  public:
17
17
  InsertRelation(shared_ptr<Relation> child, string schema_name, string table_name);
18
+ InsertRelation(shared_ptr<Relation> child, string catalog_name, string schema_name, string table_name);
18
19
 
19
20
  shared_ptr<Relation> child;
21
+ string catalog_name;
20
22
  string schema_name;
21
23
  string table_name;
22
24
  vector<ColumnDefinition> columns;
@@ -29,6 +29,8 @@ public:
29
29
 
30
30
  unique_ptr<TableRef> GetTableRef() override;
31
31
 
32
+ void Insert(const vector<vector<Value>> &values) override;
33
+ void Insert(vector<vector<unique_ptr<ParsedExpression>>> &&expressions) override;
32
34
  void Update(const string &update, const string &condition = string()) override;
33
35
  void Update(vector<string> column_names, vector<unique_ptr<ParsedExpression>> &&update,
34
36
  unique_ptr<ParsedExpression> condition = nullptr) override;
@@ -162,19 +162,27 @@ public:
162
162
 
163
163
  //! Insert the data from this relation into a table
164
164
  DUCKDB_API shared_ptr<Relation> InsertRel(const string &schema_name, const string &table_name);
165
+ DUCKDB_API shared_ptr<Relation> InsertRel(const string &catalog_name, const string &schema_name,
166
+ const string &table_name);
165
167
  DUCKDB_API void Insert(const string &table_name);
166
168
  DUCKDB_API void Insert(const string &schema_name, const string &table_name);
169
+ DUCKDB_API void Insert(const string &catalog_name, const string &schema_name, const string &table_name);
167
170
  //! Insert a row (i.e.,list of values) into a table
168
- DUCKDB_API void Insert(const vector<vector<Value>> &values);
169
- DUCKDB_API void Insert(vector<vector<unique_ptr<ParsedExpression>>> &&expressions);
171
+ DUCKDB_API virtual void Insert(const vector<vector<Value>> &values);
172
+ DUCKDB_API virtual void Insert(vector<vector<unique_ptr<ParsedExpression>>> &&expressions);
170
173
  //! Create a table and insert the data from this relation into that table
171
174
  DUCKDB_API shared_ptr<Relation> CreateRel(const string &schema_name, const string &table_name,
172
175
  bool temporary = false,
173
176
  OnCreateConflict on_conflict = OnCreateConflict::ERROR_ON_CONFLICT);
177
+ DUCKDB_API shared_ptr<Relation> CreateRel(const string &catalog_name, const string &schema_name,
178
+ const string &table_name, bool temporary = false,
179
+ OnCreateConflict on_conflict = OnCreateConflict::ERROR_ON_CONFLICT);
174
180
  DUCKDB_API void Create(const string &table_name, bool temporary = false,
175
181
  OnCreateConflict on_conflict = OnCreateConflict::ERROR_ON_CONFLICT);
176
182
  DUCKDB_API void Create(const string &schema_name, const string &table_name, bool temporary = false,
177
183
  OnCreateConflict on_conflict = OnCreateConflict::ERROR_ON_CONFLICT);
184
+ DUCKDB_API void Create(const string &catalog_name, const string &schema_name, const string &table_name,
185
+ bool temporary = false, OnCreateConflict on_conflict = OnCreateConflict::ERROR_ON_CONFLICT);
178
186
 
179
187
  //! Write a relation to a CSV file
180
188
  DUCKDB_API shared_ptr<Relation>
@@ -337,6 +337,15 @@ struct DebugSkipCheckpointOnCommitSetting {
337
337
  static constexpr SetScope DefaultScope = SetScope::GLOBAL;
338
338
  };
339
339
 
340
+ struct DebugVerifyBlocksSetting {
341
+ using RETURN_TYPE = bool;
342
+ static constexpr const char *Name = "debug_verify_blocks";
343
+ static constexpr const char *Description = "DEBUG SETTING: verify block metadata during checkpointing";
344
+ static constexpr const char *InputType = "BOOLEAN";
345
+ static constexpr const char *DefaultValue = "false";
346
+ static constexpr SetScope DefaultScope = SetScope::GLOBAL;
347
+ };
348
+
340
349
  struct DebugVerifyVectorSetting {
341
350
  using RETURN_TYPE = DebugVectorVerification;
342
351
  static constexpr const char *Name = "debug_verify_vector";
@@ -30,7 +30,7 @@ private:
30
30
  // only pull up filters when there is a fork
31
31
  bool can_pullup = false;
32
32
 
33
- // identifiy case the branch is a set operation (INTERSECT or EXCEPT)
33
+ // identify case the branch is a set operation (INTERSECT or EXCEPT)
34
34
  bool can_add_column = false;
35
35
 
36
36
  private:
@@ -40,30 +40,26 @@ private:
40
40
 
41
41
  //! Pull up a LogicalFilter op
42
42
  unique_ptr<LogicalOperator> PullupFilter(unique_ptr<LogicalOperator> op);
43
-
44
43
  //! Pull up filter in a LogicalProjection op
45
44
  unique_ptr<LogicalOperator> PullupProjection(unique_ptr<LogicalOperator> op);
46
-
47
45
  //! Pull up filter in a LogicalCrossProduct op
48
46
  unique_ptr<LogicalOperator> PullupCrossProduct(unique_ptr<LogicalOperator> op);
49
-
47
+ //! Pullup a filter in a LogicalJoin
50
48
  unique_ptr<LogicalOperator> PullupJoin(unique_ptr<LogicalOperator> op);
51
-
52
- // PPullup filter in a left join
49
+ //! Pullup filter in a left join
53
50
  unique_ptr<LogicalOperator> PullupFromLeft(unique_ptr<LogicalOperator> op);
54
-
55
- // Pullup filter in a inner join
51
+ //! Pullup filter in an inner join
56
52
  unique_ptr<LogicalOperator> PullupInnerJoin(unique_ptr<LogicalOperator> op);
57
-
58
- // Pullup filter in LogicalIntersect or LogicalExcept op
53
+ //! Pullup filter through a distinct
54
+ unique_ptr<LogicalOperator> PullupDistinct(unique_ptr<LogicalOperator> op);
55
+ //! Pullup filter in LogicalIntersect or LogicalExcept op
59
56
  unique_ptr<LogicalOperator> PullupSetOperation(unique_ptr<LogicalOperator> op);
60
-
57
+ //! Pullup filter in both sides of a join
61
58
  unique_ptr<LogicalOperator> PullupBothSide(unique_ptr<LogicalOperator> op);
62
59
 
63
- // Finish pull up at this operator
60
+ //! Finish pull up at this operator
64
61
  unique_ptr<LogicalOperator> FinishPullup(unique_ptr<LogicalOperator> op);
65
-
66
- // special treatment for SetOperations and projections
62
+ //! special treatment for SetOperations and projections
67
63
  void ProjectSetOperation(LogicalProjection &proj);
68
64
 
69
65
  }; // end FilterPullup
@@ -56,7 +56,11 @@ public:
56
56
  //! Extract the set of relations referred to inside an expression
57
57
  bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
58
58
  void AddRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
59
-
59
+ //! Add an unnest relation which can come from a logical unnest or a logical get which has an unnest function
60
+ void AddUnnestRelation(JoinOrderOptimizer &optimizer, LogicalOperator &op, LogicalOperator &input_op,
61
+ optional_ptr<LogicalOperator> parent, RelationStats &child_stats,
62
+ optional_ptr<LogicalOperator> limit_op,
63
+ vector<reference<LogicalOperator>> &datasource_filters);
60
64
  void AddAggregateOrWindowRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent,
61
65
  const RelationStats &stats, LogicalOperatorType op_type);
62
66
  vector<unique_ptr<SingleJoinRelation>> GetRelations();
@@ -78,6 +78,9 @@ public:
78
78
  virtual void Serialize(Serializer &serializer) const;
79
79
  static unique_ptr<QueryNode> Deserialize(Deserializer &deserializer);
80
80
 
81
+ //! TEMPORARY BUG FIX WORKAROUND: extract elements from the CommonTableExpressionMap and construct CTENodes
82
+ static void ExtractCTENodes(unique_ptr<QueryNode> &query_node);
83
+
81
84
  protected:
82
85
  //! Copy base QueryNode properties from another expression to this one,
83
86
  //! used in Copy method
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/string.hpp"
12
+ #include "duckdb/common/unique_ptr.hpp"
12
13
  #include "duckdb/common/vector.hpp"
13
14
 
14
15
  namespace duckdb {
@@ -61,6 +61,15 @@ struct MetaBlockPointer {
61
61
  block_id_t GetBlockId() const;
62
62
  uint32_t GetBlockIndex() const;
63
63
 
64
+ bool operator==(const MetaBlockPointer &rhs) const {
65
+ return block_pointer == rhs.block_pointer && offset == rhs.offset;
66
+ }
67
+
68
+ friend std::ostream &operator<<(std::ostream &os, const MetaBlockPointer &obj) {
69
+ return os << "{block_id: " << obj.GetBlockId() << " index: " << obj.GetBlockIndex() << " offset: " << obj.offset
70
+ << "}";
71
+ }
72
+
64
73
  void Serialize(Serializer &serializer) const;
65
74
  static MetaBlockPointer Deserialize(Deserializer &source);
66
75
  };
@@ -24,6 +24,8 @@ class ClientContext;
24
24
  class DatabaseInstance;
25
25
  class MetadataManager;
26
26
 
27
+ enum class ConvertToPersistentMode { DESTRUCTIVE, THREAD_SAFE };
28
+
27
29
  //! BlockManager is an abstract representation to manage blocks on DuckDB. When writing or reading blocks, the
28
30
  //! BlockManager creates and accesses blocks. The concrete types implement specific block storage strategies.
29
31
  class BlockManager {
@@ -91,10 +93,15 @@ public:
91
93
  //! Register a block with the given block id in the base file
92
94
  shared_ptr<BlockHandle> RegisterBlock(block_id_t block_id);
93
95
  //! Convert an existing in-memory buffer into a persistent disk-backed block
96
+ //! If mode is set to destructive (default) - the old_block will be destroyed as part of this method
97
+ //! This can only be safely used when there is no other (lingering) usage of old_block
98
+ //! If there is concurrent usage of the block elsewhere - use the THREAD_SAFE mode which creates an extra copy
94
99
  shared_ptr<BlockHandle> ConvertToPersistent(QueryContext context, block_id_t block_id,
95
- shared_ptr<BlockHandle> old_block, BufferHandle old_handle);
100
+ shared_ptr<BlockHandle> old_block, BufferHandle old_handle,
101
+ ConvertToPersistentMode mode = ConvertToPersistentMode::DESTRUCTIVE);
96
102
  shared_ptr<BlockHandle> ConvertToPersistent(QueryContext context, block_id_t block_id,
97
- shared_ptr<BlockHandle> old_block);
103
+ shared_ptr<BlockHandle> old_block,
104
+ ConvertToPersistentMode mode = ConvertToPersistentMode::DESTRUCTIVE);
98
105
 
99
106
  void UnregisterBlock(BlockHandle &block);
100
107
  //! UnregisterBlock, only accepts non-temporary block ids
@@ -31,9 +31,15 @@ class Index {
31
31
  protected:
32
32
  Index(const vector<column_t> &column_ids, TableIOManager &table_io_manager, AttachedDatabase &db);
33
33
 
34
- //! The logical column ids of the indexed table
34
+ //! The physical column ids of the indexed columns.
35
+ //! For example, given a table with the following columns:
36
+ //! (a INT, gen AS (2 * a), b INT, c VARCHAR), an index on columns (a,c) would have physical
37
+ //! column_ids [0,2] (since the virtual column is skipped in the physical representation).
38
+ //! Also see comments in bound_index.hpp to see how these column IDs are used in the context of
39
+ //! bound/unbound expressions.
40
+ //! Note that these are the columns for this Index, not all Indexes on the table.
35
41
  vector<column_t> column_ids;
36
- //! Unordered set of column_ids used by the index
42
+ //! Unordered set of column_ids used by the Index
37
43
  unordered_set<column_t> column_id_set;
38
44
 
39
45
  public:
@@ -77,6 +77,8 @@ public:
77
77
  //! Flush all blocks to disk
78
78
  void Flush();
79
79
 
80
+ bool BlockHasBeenCleared(const MetaBlockPointer &ptr);
81
+
80
82
  void MarkBlocksAsModified();
81
83
  void ClearModifiedBlocks(const vector<MetaBlockPointer> &pointers);
82
84
 
@@ -52,7 +52,7 @@ private:
52
52
  MetadataManager &manager;
53
53
  BlockReaderType type;
54
54
  MetadataHandle block;
55
- MetadataPointer next_pointer;
55
+ MetaBlockPointer next_pointer;
56
56
  bool has_next_block;
57
57
  optional_ptr<vector<MetaBlockPointer>> read_pointers;
58
58
  idx_t index;
@@ -40,11 +40,4 @@ struct StorageOptions {
40
40
  void Initialize(const unordered_map<string, Value> &options);
41
41
  };
42
42
 
43
- inline void ClearUserKey(shared_ptr<string> const &encryption_key) {
44
- if (encryption_key && !encryption_key->empty()) {
45
- memset(&(*encryption_key)[0], 0, encryption_key->size());
46
- encryption_key->clear();
47
- }
48
- }
49
-
50
43
  } // namespace duckdb
@@ -65,7 +65,8 @@ struct RowGroupWriteInfo {
65
65
  struct RowGroupWriteData {
66
66
  vector<unique_ptr<ColumnCheckpointState>> states;
67
67
  vector<BaseStatistics> statistics;
68
- vector<MetaBlockPointer> existing_pointers;
68
+ bool reuse_existing_metadata_blocks = false;
69
+ vector<idx_t> existing_extra_metadata_blocks;
69
70
  };
70
71
 
71
72
  class RowGroup : public SegmentBase<RowGroup> {
@@ -94,7 +95,10 @@ public:
94
95
  return collection.get();
95
96
  }
96
97
  //! Returns the list of meta block pointers used by the columns
97
- vector<MetaBlockPointer> GetColumnPointers();
98
+ vector<idx_t> GetOrComputeExtraMetadataBlocks(bool force_compute = false);
99
+
100
+ const vector<MetaBlockPointer> &GetColumnStartPointers() const;
101
+
98
102
  //! Returns the list of meta block pointers used by the deletes
99
103
  const vector<MetaBlockPointer> &GetDeletesPointers() const {
100
104
  return deletes_pointers;
@@ -18,6 +18,12 @@ public:
18
18
  optional_ptr<case_insensitive_map_t<BoundParameterData>> parameters);
19
19
  static unique_ptr<StatementVerifier> Create(const SQLStatement &statement,
20
20
  optional_ptr<case_insensitive_map_t<BoundParameterData>> parameters);
21
+
22
+ public:
23
+ // TEMPORARY FIX: work-around for CTE serialization for v1.4.X
24
+ bool RequireEquality() const override {
25
+ return false;
26
+ }
21
27
  };
22
28
 
23
29
  } // namespace duckdb