duckdb 0.7.1-dev341.0 → 0.7.1-dev407.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/include/json_scan.hpp +16 -0
  3. package/src/duckdb/extension/json/json_functions/json_structure.cpp +8 -3
  4. package/src/duckdb/extension/json/json_functions/json_transform.cpp +8 -2
  5. package/src/duckdb/extension/json/json_functions.cpp +1 -0
  6. package/src/duckdb/src/common/bind_helpers.cpp +55 -0
  7. package/src/duckdb/src/common/types.cpp +1 -1
  8. package/src/duckdb/src/execution/physical_operator.cpp +6 -6
  9. package/src/duckdb/src/function/table/system/duckdb_temporary_files.cpp +59 -0
  10. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  11. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  12. package/src/duckdb/src/include/duckdb/common/bind_helpers.hpp +2 -0
  13. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  14. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  15. package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
  16. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +0 -7
  17. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +8 -0
  18. package/src/duckdb/src/main/config.cpp +1 -0
  19. package/src/duckdb/src/main/database.cpp +10 -3
  20. package/src/duckdb/src/main/extension/extension_install.cpp +43 -9
  21. package/src/duckdb/src/main/extension/extension_load.cpp +7 -2
  22. package/src/duckdb/src/main/settings/settings.cpp +16 -0
  23. package/src/duckdb/src/parallel/pipeline_executor.cpp +1 -55
  24. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -11
  25. package/src/duckdb/src/storage/buffer_manager.cpp +75 -23
  26. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.1-dev341.0",
5
+ "version": "0.7.1-dev407.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -296,6 +296,21 @@ public:
296
296
  return lstate.GetBatchIndex();
297
297
  }
298
298
 
299
+ static unique_ptr<NodeStatistics> JSONScanCardinality(ClientContext &context, const FunctionData *bind_data) {
300
+ auto &data = (JSONScanData &)*bind_data;
301
+ idx_t per_file_cardinality;
302
+ if (data.stored_readers.empty()) {
303
+ // The cardinality of an unknown JSON file is the almighty number 42 except when it's not
304
+ per_file_cardinality = 42;
305
+ } else {
306
+ // If we multiply the almighty number 42 by 10, we get the exact average size of a JSON
307
+ // Not really, but the average size of a lineitem row in JSON is around 360 bytes
308
+ per_file_cardinality = data.stored_readers[0]->GetFileHandle().FileSize() / 420;
309
+ }
310
+ // Obviously this can be improved but this is better than defaulting to 0
311
+ return make_unique<NodeStatistics>(per_file_cardinality * data.file_paths.size());
312
+ }
313
+
299
314
  static void JSONScanSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const TableFunction &function) {
300
315
  auto &bind_data = (JSONScanData &)*bind_data_p;
301
316
  bind_data.Serialize(writer);
@@ -316,6 +331,7 @@ public:
316
331
 
317
332
  table_function.table_scan_progress = JSONScanProgress;
318
333
  table_function.get_batch_index = JSONScanGetBatchIndex;
334
+ table_function.cardinality = JSONScanCardinality;
319
335
 
320
336
  table_function.serialize = JSONScanSerialize;
321
337
  table_function.deserialize = JSONScanDeserialize;
@@ -214,9 +214,6 @@ void JSONStructureNode::RefineCandidateTypesObject(yyjson_val *vals[], idx_t cou
214
214
  }
215
215
  }
216
216
 
217
- if (count > STANDARD_VECTOR_SIZE) {
218
- string_vector.Initialize(false, count);
219
- }
220
217
  for (idx_t child_idx = 0; child_idx < child_count; child_idx++) {
221
218
  desc.children[child_idx].RefineCandidateTypes(child_vals[child_idx], count, string_vector, allocator,
222
219
  date_format_map);
@@ -431,6 +428,10 @@ static inline yyjson_mut_val *ConvertStructureArray(const JSONStructureNode &nod
431
428
  static inline yyjson_mut_val *ConvertStructureObject(const JSONStructureNode &node, yyjson_mut_doc *doc) {
432
429
  D_ASSERT(node.descriptions.size() == 1 && node.descriptions[0].type == LogicalTypeId::STRUCT);
433
430
  auto &desc = node.descriptions[0];
431
+ if (desc.children.empty()) {
432
+ // Empty struct - let's do JSON instead
433
+ return yyjson_mut_str(doc, JSONCommon::JSON_TYPE_NAME);
434
+ }
434
435
 
435
436
  auto obj = yyjson_mut_obj(doc);
436
437
  for (auto &child : desc.children) {
@@ -495,6 +496,10 @@ static LogicalType StructureToTypeObject(ClientContext &context, const JSONStruc
495
496
  idx_t depth) {
496
497
  D_ASSERT(node.descriptions.size() == 1 && node.descriptions[0].type == LogicalTypeId::STRUCT);
497
498
  auto &desc = node.descriptions[0];
499
+ if (desc.children.empty()) {
500
+ // Empty struct - let's do JSON instead
501
+ return JSONCommon::JSONType();
502
+ }
498
503
 
499
504
  child_list_t<LogicalType> child_types;
500
505
  child_types.reserve(desc.children.size());
@@ -58,6 +58,9 @@ static LogicalType StructureToTypeObject(yyjson_val *obj, ClientContext &context
58
58
  child_types.emplace_back(key_str, StructureStringToType(val, context));
59
59
  }
60
60
  D_ASSERT(yyjson_obj_size(obj) == names.size());
61
+ if (child_types.empty()) {
62
+ throw InvalidInputException("Empty object in JSON structure");
63
+ }
61
64
  return LogicalType::STRUCT(child_types);
62
65
  }
63
66
 
@@ -87,7 +90,7 @@ static unique_ptr<FunctionData> JSONTransformBind(ClientContext &context, Scalar
87
90
  } else {
88
91
  auto structure_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
89
92
  if (!structure_val.DefaultTryCastAs(JSONCommon::JSONType())) {
90
- throw InvalidInputException("cannot cast JSON structure to string");
93
+ throw InvalidInputException("Cannot cast JSON structure to string");
91
94
  }
92
95
  auto structure_string = structure_val.GetValueUnsafe<string_t>();
93
96
  JSONAllocator json_allocator(Allocator::DefaultAllocator());
@@ -251,7 +254,10 @@ static bool TransformDecimal(yyjson_val *vals[], Vector &result, const idx_t cou
251
254
 
252
255
  bool JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target,
253
256
  Vector &string_vector, JSONTransformOptions &options) {
254
- auto data = (string_t *)FlatVector::GetData(string_vector);
257
+ if (count > STANDARD_VECTOR_SIZE) {
258
+ string_vector.Initialize(false, count);
259
+ }
260
+ auto data = FlatVector::GetData<string_t>(string_vector);
255
261
  auto &validity = FlatVector::Validity(string_vector);
256
262
  validity.SetAllValid(count);
257
263
 
@@ -173,6 +173,7 @@ unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context,
173
173
  lower_name = lower_name.substr(0, lower_name.size() - 4);
174
174
  }
175
175
  if (!StringUtil::EndsWith(lower_name, ".json") && !StringUtil::Contains(lower_name, ".json?") &&
176
+ !StringUtil::EndsWith(lower_name, ".jsonl") && !StringUtil::Contains(lower_name, ".jsonl?") &&
176
177
  !StringUtil::EndsWith(lower_name, ".ndjson") && !StringUtil::Contains(lower_name, ".ndjson?")) {
177
178
  return nullptr;
178
179
  }
@@ -4,6 +4,7 @@
4
4
  #include "duckdb/common/exception.hpp"
5
5
  #include "duckdb/common/types/value.hpp"
6
6
  #include "duckdb/common/case_insensitive_map.hpp"
7
+ #include <numeric>
7
8
 
8
9
  namespace duckdb {
9
10
 
@@ -64,4 +65,58 @@ vector<bool> ParseColumnList(const Value &value, vector<string> &names, const st
64
65
  return ParseColumnList(children, names, loption);
65
66
  }
66
67
 
68
+ vector<idx_t> ParseColumnsOrdered(const vector<Value> &set, vector<string> &names, const string &loption) {
69
+ vector<idx_t> result;
70
+
71
+ if (set.empty()) {
72
+ throw BinderException("\"%s\" expects a column list or * as parameter", loption);
73
+ }
74
+
75
+ // Maps option to bool indicating if its found and the index in the original set
76
+ case_insensitive_map_t<std::pair<bool, idx_t>> option_map;
77
+ for (idx_t i = 0; i < set.size(); i++) {
78
+ option_map[set[i].ToString()] = {false, i};
79
+ }
80
+ result.resize(option_map.size());
81
+
82
+ for (idx_t i = 0; i < names.size(); i++) {
83
+ auto entry = option_map.find(names[i]);
84
+ if (entry != option_map.end()) {
85
+ result[entry->second.second] = i;
86
+ entry->second.first = true;
87
+ }
88
+ }
89
+ for (auto &entry : option_map) {
90
+ if (!entry.second.first) {
91
+ throw BinderException("\"%s\" expected to find %s, but it was not found in the table", loption,
92
+ entry.first.c_str());
93
+ }
94
+ }
95
+ return result;
96
+ }
97
+
98
+ vector<idx_t> ParseColumnsOrdered(const Value &value, vector<string> &names, const string &loption) {
99
+ vector<idx_t> result;
100
+
101
+ // Only accept a list of arguments
102
+ if (value.type().id() != LogicalTypeId::LIST) {
103
+ // Support a single argument if it's '*'
104
+ if (value.type().id() == LogicalTypeId::VARCHAR && value.GetValue<string>() == "*") {
105
+ result.resize(names.size(), 0);
106
+ std::iota(std::begin(result), std::end(result), 0);
107
+ return result;
108
+ }
109
+ throw BinderException("\"%s\" expects a column list or * as parameter", loption);
110
+ }
111
+ auto &children = ListValue::GetChildren(value);
112
+ // accept '*' as single argument
113
+ if (children.size() == 1 && children[0].type().id() == LogicalTypeId::VARCHAR &&
114
+ children[0].GetValue<string>() == "*") {
115
+ result.resize(names.size(), 0);
116
+ std::iota(std::begin(result), std::end(result), 0);
117
+ return result;
118
+ }
119
+ return ParseColumnsOrdered(children, names, loption);
120
+ }
121
+
67
122
  } // namespace duckdb
@@ -424,7 +424,7 @@ string LogicalType::ToString() const {
424
424
  auto &child_types = StructType::GetChildTypes(*this);
425
425
  string ret = "STRUCT(";
426
426
  for (size_t i = 0; i < child_types.size(); i++) {
427
- ret += child_types[i].first + " " + child_types[i].second.ToString();
427
+ ret += KeywordHelper::WriteOptionallyQuoted(child_types[i].first) + " " + child_types[i].second.ToString();
428
428
  if (i < child_types.size() - 1) {
429
429
  ret += ", ";
430
430
  }
@@ -252,15 +252,15 @@ OperatorResultType CachingPhysicalOperator::Execute(ExecutionContext &context, D
252
252
  if (!state.initialized) {
253
253
  state.initialized = true;
254
254
  state.can_cache_chunk = true;
255
+
255
256
  if (!context.pipeline || !caching_supported) {
256
257
  state.can_cache_chunk = false;
257
- }
258
-
259
- if (context.pipeline->GetSink() && context.pipeline->GetSink()->RequiresBatchIndex()) {
258
+ } else if (!context.pipeline->GetSink()) {
259
+ // Disabling for pipelines without Sink, i.e. when pulling
260
260
  state.can_cache_chunk = false;
261
- }
262
-
263
- if (context.pipeline->IsOrderDependent()) {
261
+ } else if (context.pipeline->GetSink()->RequiresBatchIndex()) {
262
+ state.can_cache_chunk = false;
263
+ } else if (context.pipeline->IsOrderDependent()) {
264
264
  state.can_cache_chunk = false;
265
265
  }
266
266
  }
@@ -0,0 +1,59 @@
1
+ #include "duckdb/function/table/system_functions.hpp"
2
+ #include "duckdb/storage/buffer_manager.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ struct DuckDBTemporaryFilesData : public GlobalTableFunctionState {
7
+ DuckDBTemporaryFilesData() : offset(0) {
8
+ }
9
+
10
+ vector<TemporaryFileInformation> entries;
11
+ idx_t offset;
12
+ };
13
+
14
+ static unique_ptr<FunctionData> DuckDBTemporaryFilesBind(ClientContext &context, TableFunctionBindInput &input,
15
+ vector<LogicalType> &return_types, vector<string> &names) {
16
+ names.emplace_back("path");
17
+ return_types.emplace_back(LogicalType::VARCHAR);
18
+
19
+ names.emplace_back("size");
20
+ return_types.emplace_back(LogicalType::BIGINT);
21
+
22
+ return nullptr;
23
+ }
24
+
25
+ unique_ptr<GlobalTableFunctionState> DuckDBTemporaryFilesInit(ClientContext &context, TableFunctionInitInput &input) {
26
+ auto result = make_unique<DuckDBTemporaryFilesData>();
27
+
28
+ result->entries = BufferManager::GetBufferManager(context).GetTemporaryFiles();
29
+ return std::move(result);
30
+ }
31
+
32
+ void DuckDBTemporaryFilesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
33
+ auto &data = (DuckDBTemporaryFilesData &)*data_p.global_state;
34
+ if (data.offset >= data.entries.size()) {
35
+ // finished returning values
36
+ return;
37
+ }
38
+ // start returning values
39
+ // either fill up the chunk or return all the remaining columns
40
+ idx_t count = 0;
41
+ while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) {
42
+ auto &entry = data.entries[data.offset++];
43
+ // return values:
44
+ idx_t col = 0;
45
+ // database_name, VARCHAR
46
+ output.SetValue(col++, count, entry.path);
47
+ // database_oid, BIGINT
48
+ output.SetValue(col++, count, Value::BIGINT(entry.size));
49
+ count++;
50
+ }
51
+ output.SetCardinality(count);
52
+ }
53
+
54
+ void DuckDBTemporaryFilesFun::RegisterFunction(BuiltinFunctions &set) {
55
+ set.AddFunction(TableFunction("duckdb_temporary_files", {}, DuckDBTemporaryFilesFunction, DuckDBTemporaryFilesBind,
56
+ DuckDBTemporaryFilesInit));
57
+ }
58
+
59
+ } // namespace duckdb
@@ -29,6 +29,7 @@ void BuiltinFunctions::RegisterSQLiteFunctions() {
29
29
  DuckDBSequencesFun::RegisterFunction(*this);
30
30
  DuckDBSettingsFun::RegisterFunction(*this);
31
31
  DuckDBTablesFun::RegisterFunction(*this);
32
+ DuckDBTemporaryFilesFun::RegisterFunction(*this);
32
33
  DuckDBTypesFun::RegisterFunction(*this);
33
34
  DuckDBViewsFun::RegisterFunction(*this);
34
35
  TestAllTypesFun::RegisterFunction(*this);
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.1-dev341"
2
+ #define DUCKDB_VERSION "0.7.1-dev407"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "d58ab188ff"
5
+ #define DUCKDB_SOURCE_ID "66ba97b5f9"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -17,5 +17,7 @@ class Value;
17
17
  Value ConvertVectorToValue(vector<Value> set);
18
18
  vector<bool> ParseColumnList(const vector<Value> &set, vector<string> &names, const string &option_name);
19
19
  vector<bool> ParseColumnList(const Value &value, vector<string> &names, const string &option_name);
20
+ vector<idx_t> ParseColumnsOrdered(const vector<Value> &set, vector<string> &names, const string &loption);
21
+ vector<idx_t> ParseColumnsOrdered(const Value &value, vector<string> &names, const string &loption);
20
22
 
21
23
  } // namespace duckdb
@@ -89,6 +89,10 @@ struct DuckDBTablesFun {
89
89
  static void RegisterFunction(BuiltinFunctions &set);
90
90
  };
91
91
 
92
+ struct DuckDBTemporaryFilesFun {
93
+ static void RegisterFunction(BuiltinFunctions &set);
94
+ };
95
+
92
96
  struct DuckDBTypesFun {
93
97
  static void RegisterFunction(BuiltinFunctions &set);
94
98
  };
@@ -136,6 +136,8 @@ struct DBConfigOptions {
136
136
  case_insensitive_map_t<Value> set_variables;
137
137
  //! Database configuration variable default values;
138
138
  case_insensitive_map_t<Value> set_variable_defaults;
139
+ //! Directory to store extension binaries in
140
+ string extension_directory;
139
141
  //! Whether unsigned extensions should be loaded
140
142
  bool allow_unsigned_extensions = false;
141
143
  //! Enable emitting FSST Vectors
@@ -216,6 +216,15 @@ struct ExplainOutputSetting {
216
216
  static Value GetSetting(ClientContext &context);
217
217
  };
218
218
 
219
+ struct ExtensionDirectorySetting {
220
+ static constexpr const char *Name = "extension_directory";
221
+ static constexpr const char *Description = "Set the directory to store extensions in";
222
+ static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR;
223
+ static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
224
+ static void ResetGlobal(DatabaseInstance *db, DBConfig &config);
225
+ static Value GetSetting(ClientContext &context);
226
+ };
227
+
219
228
  struct ExternalThreadsSetting {
220
229
  static constexpr const char *Name = "external_threads";
221
230
  static constexpr const char *Description = "The number of external threads that work on DuckDB tasks.";
@@ -69,13 +69,6 @@ private:
69
69
  //! The final chunk used for moving data into the sink
70
70
  DataChunk final_chunk;
71
71
 
72
- //! Indicates that the first non-finished operator in the pipeline with RequireFinalExecute has some pending result
73
- bool pending_final_execute = false;
74
- //! The OperatorFinalizeResultType corresponding to the currently pending final_execute result
75
- OperatorFinalizeResultType cached_final_execute_result;
76
- //! Source has been exhausted
77
- bool source_empty = false;
78
-
79
72
  //! The operators that are not yet finished executing and have data remaining
80
73
  //! If the stack of in_process_operators is empty, we fetch from the source instead
81
74
  stack<idx_t> in_process_operators;
@@ -23,6 +23,11 @@ class DatabaseInstance;
23
23
  class TemporaryDirectoryHandle;
24
24
  struct EvictionQueue;
25
25
 
26
+ struct TemporaryFileInformation {
27
+ string path;
28
+ idx_t size;
29
+ };
30
+
26
31
  //! The buffer manager is in charge of handling memory management for the database. It hands out memory buffers that can
27
32
  //! be used by the database internally.
28
33
  //
@@ -98,6 +103,9 @@ public:
98
103
  DUCKDB_API void ReserveMemory(idx_t size);
99
104
  DUCKDB_API void FreeReservedMemory(idx_t size);
100
105
 
106
+ //! Returns a list of all temporary files
107
+ vector<TemporaryFileInformation> GetTemporaryFiles();
108
+
101
109
  private:
102
110
  //! Register an in-memory buffer of arbitrary size, as long as it is >= BLOCK_SIZE. can_destroy signifies whether or
103
111
  //! not the buffer can be destroyed when unpinned, or whether or not it needs to be written to a temporary file so
@@ -67,6 +67,7 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
67
67
  DUCKDB_LOCAL(EnableProgressBarPrintSetting),
68
68
  DUCKDB_GLOBAL(ExperimentalParallelCSVSetting),
69
69
  DUCKDB_LOCAL(ExplainOutputSetting),
70
+ DUCKDB_GLOBAL(ExtensionDirectorySetting),
70
71
  DUCKDB_GLOBAL(ExternalThreadsSetting),
71
72
  DUCKDB_LOCAL(FileSearchPathSetting),
72
73
  DUCKDB_GLOBAL(ForceCompressionSetting),
@@ -144,9 +144,15 @@ unique_ptr<AttachedDatabase> DatabaseInstance::CreateAttachedDatabase(AttachInfo
144
144
  if (entry == config.storage_extensions.end()) {
145
145
  throw BinderException("Unrecognized storage type \"%s\"", type);
146
146
  }
147
- // use storage extension to create the initial database
148
- attached_database = make_unique<AttachedDatabase>(*this, Catalog::GetSystemCatalog(*this), *entry->second,
149
- info.name, info, access_mode);
147
+
148
+ if (entry->second->attach != nullptr && entry->second->create_transaction_manager != nullptr) {
149
+ // use storage extension to create the initial database
150
+ attached_database = make_unique<AttachedDatabase>(*this, Catalog::GetSystemCatalog(*this), *entry->second,
151
+ info.name, info, access_mode);
152
+ } else {
153
+ attached_database = make_unique<AttachedDatabase>(*this, Catalog::GetSystemCatalog(*this), info.name,
154
+ info.path, access_mode);
155
+ }
150
156
  } else {
151
157
  // check if this is an in-memory database or not
152
158
  attached_database =
@@ -200,6 +206,7 @@ void DatabaseInstance::Initialize(const char *database_path, DBConfig *user_conf
200
206
  AttachInfo info;
201
207
  info.name = AttachedDatabase::ExtractDatabaseName(config.options.database_path);
202
208
  info.path = config.options.database_path;
209
+
203
210
  auto attached_database = CreateAttachedDatabase(info, database_type, config.options.access_mode);
204
211
  auto initial_database = attached_database.get();
205
212
  {
@@ -40,20 +40,54 @@ const vector<string> ExtensionHelper::PathComponents() {
40
40
 
41
41
  string ExtensionHelper::ExtensionDirectory(ClientContext &context) {
42
42
  auto &fs = FileSystem::GetFileSystem(context);
43
- string local_path = fs.GetHomeDirectory(FileSystem::GetFileOpener(context));
44
- if (!fs.DirectoryExists(local_path)) {
45
- throw IOException("Can't find the home directory at '%s'\nSpecify a home directory using the SET "
46
- "home_directory='/path/to/dir' option.",
47
- local_path);
43
+ auto opener = FileSystem::GetFileOpener(context);
44
+ Value extension_directory_value;
45
+ string extension_directory;
46
+
47
+ if (context.TryGetCurrentSetting("extension_directory", extension_directory_value) &&
48
+ !extension_directory_value.IsNull() &&
49
+ !extension_directory_value.ToString().empty()) { // create the extension directory if not present
50
+ extension_directory = extension_directory_value.ToString();
51
+ // TODO this should probably live in the FileSystem
52
+ // convert random separators to platform-canonic
53
+ extension_directory = fs.ConvertSeparators(extension_directory);
54
+ // expand ~ in extension directory
55
+ extension_directory = fs.ExpandPath(extension_directory, opener);
56
+ if (!fs.DirectoryExists(extension_directory)) {
57
+ auto sep = fs.PathSeparator();
58
+ auto splits = StringUtil::Split(extension_directory, sep);
59
+ D_ASSERT(!splits.empty());
60
+ string extension_directory_prefix;
61
+ if (StringUtil::StartsWith(extension_directory, sep)) {
62
+ extension_directory_prefix = sep; // this is swallowed by Split otherwise
63
+ }
64
+ for (auto &split : splits) {
65
+ extension_directory_prefix = extension_directory_prefix + split + sep;
66
+ if (!fs.DirectoryExists(extension_directory_prefix)) {
67
+ fs.CreateDirectory(extension_directory_prefix);
68
+ }
69
+ }
70
+ }
71
+ } else { // otherwise default to home
72
+ string home_directory = fs.GetHomeDirectory(opener);
73
+ // exception if the home directory does not exist, don't create whatever we think is home
74
+ if (!fs.DirectoryExists(home_directory)) {
75
+ throw IOException("Can't find the home directory at '%s'\nSpecify a home directory using the SET "
76
+ "home_directory='/path/to/dir' option.",
77
+ home_directory);
78
+ }
79
+ extension_directory = home_directory;
48
80
  }
81
+ D_ASSERT(fs.DirectoryExists(extension_directory));
82
+
49
83
  auto path_components = PathComponents();
50
84
  for (auto &path_ele : path_components) {
51
- local_path = fs.JoinPath(local_path, path_ele);
52
- if (!fs.DirectoryExists(local_path)) {
53
- fs.CreateDirectory(local_path);
85
+ extension_directory = fs.JoinPath(extension_directory, path_ele);
86
+ if (!fs.DirectoryExists(extension_directory)) {
87
+ fs.CreateDirectory(extension_directory);
54
88
  }
55
89
  }
56
- return local_path;
90
+ return extension_directory;
57
91
  }
58
92
 
59
93
  bool ExtensionHelper::CreateSuggestions(const string &extension_name, string &message) {
@@ -32,7 +32,13 @@ ExtensionInitResult ExtensionHelper::InitialLoad(DBConfig &config, FileOpener *o
32
32
 
33
33
  // shorthand case
34
34
  if (!ExtensionHelper::IsFullPath(extension)) {
35
- string local_path = fs.GetHomeDirectory(opener);
35
+ string local_path = !config.options.extension_directory.empty() ? config.options.extension_directory
36
+ : fs.GetHomeDirectory(opener);
37
+
38
+ // convert random separators to platform-canonic
39
+ local_path = fs.ConvertSeparators(local_path);
40
+ // expand ~ in extension directory
41
+ local_path = fs.ExpandPath(local_path, opener);
36
42
  auto path_components = PathComponents();
37
43
  for (auto &path_ele : path_components) {
38
44
  local_path = fs.JoinPath(local_path, path_ele);
@@ -40,7 +46,6 @@ ExtensionInitResult ExtensionHelper::InitialLoad(DBConfig &config, FileOpener *o
40
46
  string extension_name = ApplyExtensionAlias(extension);
41
47
  filename = fs.JoinPath(local_path, extension_name + ".duckdb_extension");
42
48
  }
43
-
44
49
  if (!fs.FileExists(filename)) {
45
50
  string message;
46
51
  bool exact_match = ExtensionHelper::CreateSuggestions(extension, message);
@@ -531,6 +531,22 @@ Value ExplainOutputSetting::GetSetting(ClientContext &context) {
531
531
  }
532
532
  }
533
533
 
534
+ //===--------------------------------------------------------------------===//
535
+ // Extension Directory Setting
536
+ //===--------------------------------------------------------------------===//
537
+ void ExtensionDirectorySetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
538
+ auto new_directory = input.ToString();
539
+ config.options.extension_directory = input.ToString();
540
+ }
541
+
542
+ void ExtensionDirectorySetting::ResetGlobal(DatabaseInstance *db, DBConfig &config) {
543
+ config.options.extension_directory = DBConfig().options.extension_directory;
544
+ }
545
+
546
+ Value ExtensionDirectorySetting::GetSetting(ClientContext &context) {
547
+ return Value(DBConfig::GetConfig(context).options.extension_directory);
548
+ }
549
+
534
550
  //===--------------------------------------------------------------------===//
535
551
  // External Threads Setting
536
552
  //===--------------------------------------------------------------------===//
@@ -114,52 +114,6 @@ OperatorResultType PipelineExecutor::ExecutePushInternal(DataChunk &input, idx_t
114
114
  }
115
115
  }
116
116
 
117
- // Pull a single DataChunk from the pipeline by flushing any operators holding cached output
118
- void PipelineExecutor::FlushCachingOperatorsPull(DataChunk &result) {
119
- idx_t start_idx = IsFinished() ? idx_t(finished_processing_idx) : 0;
120
- idx_t op_idx = start_idx;
121
- while (op_idx < pipeline.operators.size()) {
122
- if (!pipeline.operators[op_idx]->RequiresFinalExecute()) {
123
- op_idx++;
124
- continue;
125
- }
126
-
127
- OperatorFinalizeResultType finalize_result;
128
- DataChunk &curr_chunk =
129
- op_idx + 1 >= intermediate_chunks.size() ? final_chunk : *intermediate_chunks[op_idx + 1];
130
-
131
- if (pending_final_execute) {
132
- // Still have a cached chunk from a last pull, reuse chunk
133
- finalize_result = cached_final_execute_result;
134
- } else {
135
- // Flush the current operator
136
- auto current_operator = pipeline.operators[op_idx];
137
- StartOperator(current_operator);
138
- finalize_result = current_operator->FinalExecute(context, curr_chunk, *current_operator->op_state,
139
- *intermediate_states[op_idx]);
140
- EndOperator(current_operator, &curr_chunk);
141
- }
142
-
143
- auto execute_result = Execute(curr_chunk, result, op_idx + 1);
144
-
145
- if (execute_result == OperatorResultType::HAVE_MORE_OUTPUT) {
146
- pending_final_execute = true;
147
- cached_final_execute_result = finalize_result;
148
- } else {
149
- pending_final_execute = false;
150
- if (finalize_result == OperatorFinalizeResultType::FINISHED) {
151
- FinishProcessing(op_idx);
152
- op_idx++;
153
- }
154
- }
155
-
156
- // Some non-empty result was pulled from some caching operator, we're done for this pull
157
- if (result.size() > 0) {
158
- break;
159
- }
160
- }
161
- }
162
-
163
117
  // Push all remaining cached operator output through the pipeline
164
118
  void PipelineExecutor::FlushCachingOperatorsPush() {
165
119
  idx_t start_idx = IsFinished() ? idx_t(finished_processing_idx) : 0;
@@ -223,21 +177,13 @@ void PipelineExecutor::ExecutePull(DataChunk &result) {
223
177
  D_ASSERT(!pipeline.sink);
224
178
  auto &source_chunk = pipeline.operators.empty() ? result : *intermediate_chunks[0];
225
179
  while (result.size() == 0) {
226
- if (source_empty) {
227
- FlushCachingOperatorsPull(result);
228
- break;
229
- }
230
-
231
180
  if (in_process_operators.empty()) {
232
181
  source_chunk.Reset();
233
182
  FetchFromSource(source_chunk);
234
-
235
183
  if (source_chunk.size() == 0) {
236
- source_empty = true;
237
- continue;
184
+ break;
238
185
  }
239
186
  }
240
-
241
187
  if (!pipeline.operators.empty()) {
242
188
  auto state = Execute(source_chunk, result);
243
189
  if (state == OperatorResultType::FINISHED) {
@@ -23,16 +23,6 @@
23
23
 
24
24
  namespace duckdb {
25
25
 
26
- static vector<idx_t> ColumnListToIndices(const vector<bool> &vec) {
27
- vector<idx_t> ret;
28
- for (idx_t i = 0; i < vec.size(); i++) {
29
- if (vec[i]) {
30
- ret.push_back(i);
31
- }
32
- }
33
- return ret;
34
- }
35
-
36
26
  vector<string> GetUniqueNames(const vector<string> &original_names) {
37
27
  unordered_set<string> name_set;
38
28
  vector<string> unique_names;
@@ -115,7 +105,7 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt) {
115
105
  }
116
106
  if (loption == "partition_by") {
117
107
  auto converted = ConvertVectorToValue(std::move(option.second));
118
- partition_cols = ColumnListToIndices(ParseColumnList(converted, select_node.names, loption));
108
+ partition_cols = ParseColumnsOrdered(converted, select_node.names, loption);
119
109
  continue;
120
110
  }
121
111
  stmt.info->options[option.first] = option.second;
@@ -62,7 +62,7 @@ BlockHandle::BlockHandle(BlockManager &block_manager, block_id_t block_id_p, uni
62
62
  memory_charge = std::move(reservation);
63
63
  }
64
64
 
65
- BlockHandle::~BlockHandle() {
65
+ BlockHandle::~BlockHandle() { // NOLINT: allow internal exceptions
66
66
  // being destroyed, so any unswizzled pointers are just binary junk now.
67
67
  unswizzled = nullptr;
68
68
  auto &buffer_manager = block_manager.buffer_manager;
@@ -522,11 +522,8 @@ void BufferManager::PurgeQueue() {
522
522
 
523
523
  void BlockManager::UnregisterBlock(block_id_t block_id, bool can_destroy) {
524
524
  if (block_id >= MAXIMUM_BLOCK) {
525
- // in-memory buffer: destroy the buffer
526
- if (!can_destroy) {
527
- // buffer could have been offloaded to disk: remove the file
528
- buffer_manager.DeleteTemporaryFile(block_id);
529
- }
525
+ // in-memory buffer: buffer could have been offloaded to disk: remove the file
526
+ buffer_manager.DeleteTemporaryFile(block_id);
530
527
  } else {
531
528
  lock_guard<mutex> lock(blocks_lock);
532
529
  // on-disk block: erase from list of blocks in manager
@@ -608,7 +605,11 @@ public:
608
605
  //! Returns true if the max_index has been altered
609
606
  bool RemoveIndex(idx_t index) {
610
607
  // remove this block from the set of blocks
611
- indexes_in_use.erase(index);
608
+ auto entry = indexes_in_use.find(index);
609
+ if (entry == indexes_in_use.end()) {
610
+ throw InternalException("RemoveIndex - index %llu not found in indexes_in_use", index);
611
+ }
612
+ indexes_in_use.erase(entry);
612
613
  free_indexes.insert(index);
613
614
  // check if we can truncate the file
614
615
 
@@ -617,7 +618,7 @@ public:
617
618
  if (max_index_in_use < max_index) {
618
619
  // max index in use is lower than the max_index
619
620
  // reduce the max_index
620
- max_index = max_index_in_use + 1;
621
+ max_index = indexes_in_use.empty() ? 0 : max_index_in_use + 1;
621
622
  // we can remove any free_indexes that are larger than the current max_index
622
623
  while (!free_indexes.empty()) {
623
624
  auto max_entry = *free_indexes.rbegin();
@@ -693,16 +694,15 @@ public:
693
694
 
694
695
  unique_ptr<FileBuffer> ReadTemporaryBuffer(block_id_t id, idx_t block_index,
695
696
  unique_ptr<FileBuffer> reusable_buffer) {
696
- auto buffer =
697
- ReadTemporaryBufferInternal(BufferManager::GetBufferManager(db), *handle, GetPositionInFile(block_index),
698
- Storage::BLOCK_SIZE, id, std::move(reusable_buffer));
699
- {
700
- // remove the block (and potentially truncate the temp file)
701
- TemporaryFileLock lock(file_lock);
702
- D_ASSERT(handle);
703
- RemoveTempBlockIndex(lock, block_index);
704
- }
705
- return buffer;
697
+ return ReadTemporaryBufferInternal(BufferManager::GetBufferManager(db), *handle, GetPositionInFile(block_index),
698
+ Storage::BLOCK_SIZE, id, std::move(reusable_buffer));
699
+ }
700
+
701
+ void EraseBlockIndex(block_id_t block_index) {
702
+ // remove the block (and potentially truncate the temp file)
703
+ TemporaryFileLock lock(file_lock);
704
+ D_ASSERT(handle);
705
+ RemoveTempBlockIndex(lock, block_index);
706
706
  }
707
707
 
708
708
  bool DeleteIfEmpty() {
@@ -718,6 +718,14 @@ public:
718
718
  return true;
719
719
  }
720
720
 
721
+ TemporaryFileInformation GetTemporaryFile() {
722
+ TemporaryFileLock lock(file_lock);
723
+ TemporaryFileInformation info;
724
+ info.path = path;
725
+ info.size = GetPositionInFile(index_manager.GetMaxIndex());
726
+ return info;
727
+ }
728
+
721
729
  private:
722
730
  void CreateFileIfNotExists(TemporaryFileLock &) {
723
731
  if (handle) {
@@ -818,7 +826,7 @@ public:
818
826
  {
819
827
  // remove the block (and potentially erase the temp file)
820
828
  TemporaryManagerLock lock(manager_lock);
821
- EraseUsedBlock(lock, id, handle, index.file_index);
829
+ EraseUsedBlock(lock, id, handle, index);
822
830
  }
823
831
  return buffer;
824
832
  }
@@ -827,14 +835,29 @@ public:
827
835
  TemporaryManagerLock lock(manager_lock);
828
836
  auto index = GetTempBlockIndex(lock, id);
829
837
  auto handle = GetFileHandle(lock, index.file_index);
830
- EraseUsedBlock(lock, id, handle, index.file_index);
838
+ EraseUsedBlock(lock, id, handle, index);
839
+ }
840
+
841
+ vector<TemporaryFileInformation> GetTemporaryFiles() {
842
+ lock_guard<mutex> lock(manager_lock);
843
+ vector<TemporaryFileInformation> result;
844
+ for (auto &file : files) {
845
+ result.push_back(file.second->GetTemporaryFile());
846
+ }
847
+ return result;
831
848
  }
832
849
 
833
850
  private:
834
- void EraseUsedBlock(TemporaryManagerLock &lock, block_id_t id, TemporaryFileHandle *handle, idx_t file_index) {
835
- used_blocks.erase(id);
851
+ void EraseUsedBlock(TemporaryManagerLock &lock, block_id_t id, TemporaryFileHandle *handle,
852
+ TemporaryFileIndex index) {
853
+ auto entry = used_blocks.find(id);
854
+ if (entry == used_blocks.end()) {
855
+ throw InternalException("EraseUsedBlock - Block %llu not found in used blocks", id);
856
+ }
857
+ used_blocks.erase(entry);
858
+ handle->EraseBlockIndex(index.block_index);
836
859
  if (handle->DeleteIfEmpty()) {
837
- EraseFileHandle(lock, file_index);
860
+ EraseFileHandle(lock, index.file_index);
838
861
  }
839
862
  }
840
863
 
@@ -992,6 +1015,35 @@ void BufferManager::DeleteTemporaryFile(block_id_t id) {
992
1015
  }
993
1016
  }
994
1017
 
1018
+ vector<TemporaryFileInformation> BufferManager::GetTemporaryFiles() {
1019
+ vector<TemporaryFileInformation> result;
1020
+ if (temp_directory.empty()) {
1021
+ return result;
1022
+ }
1023
+ {
1024
+ lock_guard<mutex> temp_handle_guard(temp_handle_lock);
1025
+ if (temp_directory_handle) {
1026
+ result = temp_directory_handle->GetTempFile().GetTemporaryFiles();
1027
+ }
1028
+ }
1029
+ auto &fs = FileSystem::GetFileSystem(db);
1030
+ fs.ListFiles(temp_directory, [&](const string &name, bool is_dir) {
1031
+ if (is_dir) {
1032
+ return;
1033
+ }
1034
+ if (!StringUtil::EndsWith(name, ".block")) {
1035
+ return;
1036
+ }
1037
+ TemporaryFileInformation info;
1038
+ info.path = name;
1039
+ auto handle = fs.OpenFile(name, FileFlags::FILE_FLAGS_READ);
1040
+ info.size = fs.GetFileSize(*handle);
1041
+ handle.reset();
1042
+ result.push_back(info);
1043
+ });
1044
+ return result;
1045
+ }
1046
+
995
1047
  string BufferManager::InMemoryWarning() {
996
1048
  if (!temp_directory.empty()) {
997
1049
  return "";
@@ -22,6 +22,8 @@
22
22
 
23
23
  #include "src/function/table/system/duckdb_tables.cpp"
24
24
 
25
+ #include "src/function/table/system/duckdb_temporary_files.cpp"
26
+
25
27
  #include "src/function/table/system/duckdb_types.cpp"
26
28
 
27
29
  #include "src/function/table/system/duckdb_views.cpp"