duckdb 0.8.2-dev3949.0 → 0.8.2-dev4025.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +7 -7
- package/duckdb_extension_config.cmake +1 -1
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/parquet_extension.cpp +23 -13
- package/src/duckdb/src/common/crypto/md5.cpp +2 -12
- package/src/duckdb/src/common/file_buffer.cpp +1 -1
- package/src/duckdb/src/core_functions/function_list.cpp +1 -0
- package/src/duckdb/src/core_functions/scalar/string/sha256.cpp +32 -0
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +7 -2
- package/src/duckdb/src/function/scalar/string/suffix.cpp +1 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +9 -0
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +7 -3
- package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +1 -1
- package/src/duckdb/src/main/database.cpp +1 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +15 -1
- package/src/duckdb/src/main/extension/extension_install.cpp +30 -17
- package/src/duckdb/src/main/extension/extension_load.cpp +36 -7
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +10 -8
- package/src/duckdb/third_party/mbedtls/include/mbedtls_wrapper.hpp +4 -1
- package/src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp +24 -2
- package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +5 -5
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
package/binding.gyp
CHANGED
@@ -249,18 +249,18 @@
|
|
249
249
|
"src/duckdb/third_party/zstd/compress/zstd_lazy.cpp",
|
250
250
|
"src/duckdb/third_party/zstd/compress/zstd_ldm.cpp",
|
251
251
|
"src/duckdb/third_party/zstd/compress/zstd_opt.cpp",
|
252
|
-
"src/duckdb/extension/icu/./icu-
|
252
|
+
"src/duckdb/extension/icu/./icu-timezone.cpp",
|
253
253
|
"src/duckdb/extension/icu/./icu-datepart.cpp",
|
254
|
-
"src/duckdb/extension/icu/./icu-datesub.cpp",
|
255
|
-
"src/duckdb/extension/icu/./icu-table-range.cpp",
|
256
|
-
"src/duckdb/extension/icu/./icu-datetrunc.cpp",
|
257
254
|
"src/duckdb/extension/icu/./icu-timebucket.cpp",
|
258
|
-
"src/duckdb/extension/icu/./icu-
|
255
|
+
"src/duckdb/extension/icu/./icu-datesub.cpp",
|
259
256
|
"src/duckdb/extension/icu/./icu-list-range.cpp",
|
260
|
-
"src/duckdb/extension/icu/./icu-
|
257
|
+
"src/duckdb/extension/icu/./icu-makedate.cpp",
|
261
258
|
"src/duckdb/extension/icu/./icu-datefunc.cpp",
|
259
|
+
"src/duckdb/extension/icu/./icu-datetrunc.cpp",
|
260
|
+
"src/duckdb/extension/icu/./icu-dateadd.cpp",
|
261
|
+
"src/duckdb/extension/icu/./icu-table-range.cpp",
|
262
262
|
"src/duckdb/extension/icu/./icu_extension.cpp",
|
263
|
-
"src/duckdb/extension/icu/./icu-
|
263
|
+
"src/duckdb/extension/icu/./icu-strptime.cpp",
|
264
264
|
"src/duckdb/ub_extension_icu_third_party_icu_common.cpp",
|
265
265
|
"src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
|
266
266
|
"src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#
|
5
5
|
# This is the default extension configuration for NodeJS builds. Basically it means that all these extensions are
|
6
6
|
# "baked in" to the NodeJS binaries Note that the configuration here is only when building Node using the main
|
7
|
-
# CMakeLists.txt file with the `
|
7
|
+
# CMakeLists.txt file with the `BUILD_NODE` variable.
|
8
8
|
# TODO: unify this by making setup.py also use this configuration, making this the config for all Node builds
|
9
9
|
duckdb_extension_load(json)
|
10
10
|
duckdb_extension_load(icu)
|
package/package.json
CHANGED
@@ -21,6 +21,8 @@
|
|
21
21
|
#include "duckdb/common/field_writer.hpp"
|
22
22
|
#include "duckdb/common/file_system.hpp"
|
23
23
|
#include "duckdb/common/multi_file_reader.hpp"
|
24
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
25
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
24
26
|
#include "duckdb/common/types/chunk_collection.hpp"
|
25
27
|
#include "duckdb/function/copy_function.hpp"
|
26
28
|
#include "duckdb/function/table_function.hpp"
|
@@ -35,8 +37,6 @@
|
|
35
37
|
#include "duckdb/planner/operator/logical_get.hpp"
|
36
38
|
#include "duckdb/storage/statistics/base_statistics.hpp"
|
37
39
|
#include "duckdb/storage/table/row_group.hpp"
|
38
|
-
#include "duckdb/common/serializer/format_serializer.hpp"
|
39
|
-
#include "duckdb/common/serializer/format_deserializer.hpp"
|
40
40
|
|
41
41
|
#endif
|
42
42
|
|
@@ -78,6 +78,8 @@ struct ParquetReadLocalState : public LocalTableFunctionState {
|
|
78
78
|
DataChunk all_columns;
|
79
79
|
};
|
80
80
|
|
81
|
+
enum class ParquetFileState : uint8_t { UNOPENED, OPENING, OPEN, CLOSED };
|
82
|
+
|
81
83
|
struct ParquetReadGlobalState : public GlobalTableFunctionState {
|
82
84
|
mutex lock;
|
83
85
|
|
@@ -86,7 +88,7 @@ struct ParquetReadGlobalState : public GlobalTableFunctionState {
|
|
86
88
|
//! Currently opened readers
|
87
89
|
vector<shared_ptr<ParquetReader>> readers;
|
88
90
|
//! Flag to indicate a file is being opened
|
89
|
-
vector<
|
91
|
+
vector<ParquetFileState> file_states;
|
90
92
|
//! Mutexes to wait for a file that is currently being opened
|
91
93
|
unique_ptr<mutex[]> file_mutexes;
|
92
94
|
//! Signal to other threads that a file failed to open, letting every thread abort.
|
@@ -359,7 +361,7 @@ public:
|
|
359
361
|
auto &bind_data = input.bind_data->CastNoConst<ParquetReadBindData>();
|
360
362
|
auto result = make_uniq<ParquetReadGlobalState>();
|
361
363
|
|
362
|
-
result->
|
364
|
+
result->file_states = vector<ParquetFileState>(bind_data.files.size(), ParquetFileState::UNOPENED);
|
363
365
|
result->file_mutexes = unique_ptr<mutex[]>(new mutex[bind_data.files.size()]);
|
364
366
|
if (bind_data.files.empty()) {
|
365
367
|
result->initial_reader = nullptr;
|
@@ -367,6 +369,8 @@ public:
|
|
367
369
|
result->readers = std::move(bind_data.union_readers);
|
368
370
|
if (result->readers.size() != bind_data.files.size()) {
|
369
371
|
result->readers = vector<shared_ptr<ParquetReader>>(bind_data.files.size(), nullptr);
|
372
|
+
} else {
|
373
|
+
std::fill(result->file_states.begin(), result->file_states.end(), ParquetFileState::OPEN);
|
370
374
|
}
|
371
375
|
if (bind_data.initial_reader) {
|
372
376
|
result->initial_reader = std::move(bind_data.initial_reader);
|
@@ -378,6 +382,7 @@ public:
|
|
378
382
|
make_shared<ParquetReader>(context, bind_data.files[0], bind_data.parquet_options);
|
379
383
|
result->readers[0] = result->initial_reader;
|
380
384
|
}
|
385
|
+
result->file_states[0] = ParquetFileState::OPEN;
|
381
386
|
}
|
382
387
|
for (auto &reader : result->readers) {
|
383
388
|
if (!reader) {
|
@@ -511,7 +516,7 @@ public:
|
|
511
516
|
|
512
517
|
D_ASSERT(parallel_state.initial_reader);
|
513
518
|
|
514
|
-
if (parallel_state.
|
519
|
+
if (parallel_state.file_states[parallel_state.file_index] == ParquetFileState::OPEN) {
|
515
520
|
if (parallel_state.row_group_index <
|
516
521
|
parallel_state.readers[parallel_state.file_index]->NumRowGroups()) {
|
517
522
|
// The current reader has rowgroups left to be scanned
|
@@ -523,12 +528,14 @@ public:
|
|
523
528
|
parallel_state.row_group_index++;
|
524
529
|
return true;
|
525
530
|
} else {
|
531
|
+
// Close current file
|
532
|
+
parallel_state.file_states[parallel_state.file_index] = ParquetFileState::CLOSED;
|
533
|
+
parallel_state.readers[parallel_state.file_index] = nullptr;
|
534
|
+
|
526
535
|
// Set state to the next file
|
527
536
|
parallel_state.file_index++;
|
528
537
|
parallel_state.row_group_index = 0;
|
529
538
|
|
530
|
-
parallel_state.readers[parallel_state.file_index - 1] = nullptr;
|
531
|
-
|
532
539
|
if (parallel_state.file_index >= bind_data.files.size()) {
|
533
540
|
return false;
|
534
541
|
}
|
@@ -541,8 +548,7 @@ public:
|
|
541
548
|
}
|
542
549
|
|
543
550
|
// Check if the current file is being opened, in that case we need to wait for it.
|
544
|
-
if (
|
545
|
-
parallel_state.file_opening[parallel_state.file_index]) {
|
551
|
+
if (parallel_state.file_states[parallel_state.file_index] == ParquetFileState::OPENING) {
|
546
552
|
WaitForFile(parallel_state.file_index, parallel_state, parallel_lock);
|
547
553
|
}
|
548
554
|
}
|
@@ -573,7 +579,8 @@ public:
|
|
573
579
|
// - the thread opening the file has failed
|
574
580
|
// - the file was somehow scanned till the end while we were waiting
|
575
581
|
if (parallel_state.file_index >= parallel_state.readers.size() ||
|
576
|
-
parallel_state.
|
582
|
+
parallel_state.file_states[parallel_state.file_index] != ParquetFileState::OPENING ||
|
583
|
+
parallel_state.error_opening_file) {
|
577
584
|
return;
|
578
585
|
}
|
579
586
|
}
|
@@ -583,10 +590,12 @@ public:
|
|
583
590
|
static bool TryOpenNextFile(ClientContext &context, const ParquetReadBindData &bind_data,
|
584
591
|
ParquetReadLocalState &scan_data, ParquetReadGlobalState ¶llel_state,
|
585
592
|
unique_lock<mutex> ¶llel_lock) {
|
586
|
-
|
587
|
-
|
593
|
+
const auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
|
594
|
+
const auto file_index_limit = MinValue<idx_t>(parallel_state.file_index + num_threads, bind_data.files.size());
|
595
|
+
for (idx_t i = parallel_state.file_index; i < file_index_limit; i++) {
|
596
|
+
if (parallel_state.file_states[i] == ParquetFileState::UNOPENED) {
|
588
597
|
string file = bind_data.files[i];
|
589
|
-
parallel_state.
|
598
|
+
parallel_state.file_states[i] = ParquetFileState::OPENING;
|
590
599
|
auto pq_options = parallel_state.initial_reader->parquet_options;
|
591
600
|
|
592
601
|
// Now we switch which lock we are holding, instead of locking the global state, we grab the lock on
|
@@ -611,6 +620,7 @@ public:
|
|
611
620
|
// Now re-lock the state and add the reader
|
612
621
|
parallel_lock.lock();
|
613
622
|
parallel_state.readers[i] = reader;
|
623
|
+
parallel_state.file_states[i] = ParquetFileState::OPEN;
|
614
624
|
|
615
625
|
return true;
|
616
626
|
}
|
@@ -20,6 +20,7 @@
|
|
20
20
|
* will fill a supplied 16-byte array with the digest.
|
21
21
|
*/
|
22
22
|
#include "duckdb/common/crypto/md5.hpp"
|
23
|
+
#include "mbedtls_wrapper.hpp"
|
23
24
|
|
24
25
|
namespace duckdb {
|
25
26
|
|
@@ -236,21 +237,10 @@ void MD5Context::Finish(data_ptr_t out_digest) {
|
|
236
237
|
memcpy(out_digest, buf, 16);
|
237
238
|
}
|
238
239
|
|
239
|
-
void MD5Context::DigestToBase16(const_data_ptr_t digest, char *zbuf) {
|
240
|
-
static char const HEX_CODES[] = "0123456789abcdef";
|
241
|
-
int i, j;
|
242
|
-
|
243
|
-
for (j = i = 0; i < 16; i++) {
|
244
|
-
int a = digest[i];
|
245
|
-
zbuf[j++] = HEX_CODES[(a >> 4) & 0xf];
|
246
|
-
zbuf[j++] = HEX_CODES[a & 0xf];
|
247
|
-
}
|
248
|
-
}
|
249
|
-
|
250
240
|
void MD5Context::FinishHex(char *out_digest) {
|
251
241
|
data_t digest[MD5_HASH_LENGTH_BINARY];
|
252
242
|
Finish(digest);
|
253
|
-
|
243
|
+
duckdb_mbedtls::MbedTlsWrapper::ToBase16(reinterpret_cast<char *>(digest), out_digest, MD5_HASH_LENGTH_BINARY);
|
254
244
|
}
|
255
245
|
|
256
246
|
string MD5Context::FinishHex() {
|
@@ -68,7 +68,7 @@ FileBuffer::MemoryRequirement FileBuffer::CalculateMemory(uint64_t user_size) {
|
|
68
68
|
result.alloc_size = user_size;
|
69
69
|
} else {
|
70
70
|
result.header_size = Storage::BLOCK_HEADER_SIZE;
|
71
|
-
result.alloc_size = AlignValue<
|
71
|
+
result.alloc_size = AlignValue<idx_t, Storage::SECTOR_SIZE>(result.header_size + user_size);
|
72
72
|
}
|
73
73
|
return result;
|
74
74
|
}
|
@@ -281,6 +281,7 @@ static StaticFunctionDefinition internal_functions[] = {
|
|
281
281
|
DUCKDB_AGGREGATE_FUNCTION(StandardErrorOfTheMeanFun),
|
282
282
|
DUCKDB_SCALAR_FUNCTION(SetBitFun),
|
283
283
|
DUCKDB_SCALAR_FUNCTION(SetseedFun),
|
284
|
+
DUCKDB_SCALAR_FUNCTION(SHA256Fun),
|
284
285
|
DUCKDB_SCALAR_FUNCTION_SET(SignFun),
|
285
286
|
DUCKDB_SCALAR_FUNCTION_SET(SignBitFun),
|
286
287
|
DUCKDB_SCALAR_FUNCTION(SinFun),
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#include "duckdb/common/exception.hpp"
|
2
|
+
#include "duckdb/common/vector_operations/unary_executor.hpp"
|
3
|
+
#include "duckdb/core_functions/scalar/string_functions.hpp"
|
4
|
+
#include "mbedtls_wrapper.hpp"
|
5
|
+
|
6
|
+
namespace duckdb {
|
7
|
+
|
8
|
+
struct SHA256Operator {
|
9
|
+
template <class INPUT_TYPE, class RESULT_TYPE>
|
10
|
+
static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
|
11
|
+
auto hash = StringVector::EmptyString(result, duckdb_mbedtls::MbedTlsWrapper::SHA256_HASH_LENGTH_TEXT);
|
12
|
+
|
13
|
+
duckdb_mbedtls::MbedTlsWrapper::SHA256State state;
|
14
|
+
state.AddString(input.GetString());
|
15
|
+
state.FinishHex(hash.GetDataWriteable());
|
16
|
+
|
17
|
+
hash.Finalize();
|
18
|
+
return hash;
|
19
|
+
}
|
20
|
+
};
|
21
|
+
|
22
|
+
static void SHA256Function(DataChunk &args, ExpressionState &state, Vector &result) {
|
23
|
+
auto &input = args.data[0];
|
24
|
+
|
25
|
+
UnaryExecutor::ExecuteString<string_t, string_t, SHA256Operator>(input, result, args.size());
|
26
|
+
}
|
27
|
+
|
28
|
+
ScalarFunction SHA256Fun::GetFunction() {
|
29
|
+
return ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, SHA256Function);
|
30
|
+
}
|
31
|
+
|
32
|
+
} // namespace duckdb
|
@@ -98,6 +98,9 @@ struct SplitStringListOperation {
|
|
98
98
|
child_start++;
|
99
99
|
return;
|
100
100
|
}
|
101
|
+
if (start_pos > pos) {
|
102
|
+
pos = start_pos;
|
103
|
+
}
|
101
104
|
child_data[child_start] = StringVector::AddString(child, buf + start_pos, pos - start_pos);
|
102
105
|
child_start++;
|
103
106
|
}
|
@@ -109,6 +112,7 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
|
|
109
112
|
idx_t len = input.GetSize();
|
110
113
|
idx_t lvl = 1;
|
111
114
|
idx_t pos = 0;
|
115
|
+
bool seen_value = false;
|
112
116
|
|
113
117
|
SkipWhitespace(buf, pos, len);
|
114
118
|
if (pos == len || buf[pos] != '[') {
|
@@ -132,9 +136,10 @@ static bool SplitStringListInternal(const string_t &input, OP &state) {
|
|
132
136
|
while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
|
133
137
|
trailing_whitespace++;
|
134
138
|
}
|
135
|
-
if (
|
139
|
+
if (buf[pos] != ']' || start_pos != pos || seen_value) {
|
136
140
|
state.HandleValue(buf, start_pos, pos - trailing_whitespace);
|
137
|
-
|
141
|
+
seen_value = true;
|
142
|
+
}
|
138
143
|
if (buf[pos] == ']') {
|
139
144
|
lvl--;
|
140
145
|
break;
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev4025"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "9698e9e6a8"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -342,6 +342,15 @@ struct RtrimFun {
|
|
342
342
|
static ScalarFunctionSet GetFunctions();
|
343
343
|
};
|
344
344
|
|
345
|
+
struct SHA256Fun {
|
346
|
+
static constexpr const char *Name = "sha256";
|
347
|
+
static constexpr const char *Parameters = "value";
|
348
|
+
static constexpr const char *Description = "Returns the SHA256 hash of the value";
|
349
|
+
static constexpr const char *Example = "sha256('hello')";
|
350
|
+
|
351
|
+
static ScalarFunction GetFunction();
|
352
|
+
};
|
353
|
+
|
345
354
|
struct StringSplitFun {
|
346
355
|
static constexpr const char *Name = "string_split";
|
347
356
|
static constexpr const char *Parameters = "string,separator";
|
@@ -46,13 +46,16 @@ public:
|
|
46
46
|
static void InstallExtension(DBConfig &config, FileSystem &fs, const string &extension, bool force_install,
|
47
47
|
const string &respository = "");
|
48
48
|
static void LoadExternalExtension(ClientContext &context, const string &extension);
|
49
|
-
static void LoadExternalExtension(DatabaseInstance &db, FileSystem &fs, const string &extension
|
49
|
+
static void LoadExternalExtension(DatabaseInstance &db, FileSystem &fs, const string &extension,
|
50
|
+
optional_ptr<const ClientConfig> client_config);
|
50
51
|
|
51
52
|
//! Autoload an extension by name. Depending on the current settings, this will either load or install+load
|
52
53
|
static void AutoLoadExtension(ClientContext &context, const string &extension_name);
|
53
54
|
|
54
55
|
static string ExtensionDirectory(ClientContext &context);
|
55
56
|
static string ExtensionDirectory(DBConfig &config, FileSystem &fs);
|
57
|
+
static string ExtensionUrlTemplate(optional_ptr<const ClientConfig> config, const string &repository);
|
58
|
+
static string ExtensionFinalizeUrlTemplate(const string &url, const string &name);
|
56
59
|
|
57
60
|
static idx_t DefaultExtensionCount();
|
58
61
|
static DefaultExtension GetDefaultExtension(idx_t index);
|
@@ -101,9 +104,10 @@ private:
|
|
101
104
|
const string &repository);
|
102
105
|
static const vector<string> PathComponents();
|
103
106
|
static bool AllowAutoInstall(const string &extension);
|
104
|
-
static ExtensionInitResult InitialLoad(DBConfig &config, FileSystem &fs, const string &extension
|
107
|
+
static ExtensionInitResult InitialLoad(DBConfig &config, FileSystem &fs, const string &extension,
|
108
|
+
optional_ptr<const ClientConfig> client_config);
|
105
109
|
static bool TryInitialLoad(DBConfig &config, FileSystem &fs, const string &extension, ExtensionInitResult &result,
|
106
|
-
string &error);
|
110
|
+
string &error, optional_ptr<const ClientConfig> client_config);
|
107
111
|
//! For tagged releases we use the tag, else we use the git commit hash
|
108
112
|
static const string GetVersionDirectoryName();
|
109
113
|
//! Version tags occur with and without 'v', tag in extension path is always with 'v'
|
@@ -30,7 +30,7 @@ public:
|
|
30
30
|
//! The type of the join (INNER, OUTER, etc...)
|
31
31
|
JoinType join_type;
|
32
32
|
//! Table index used to refer to the MARK column (in case of a MARK join)
|
33
|
-
idx_t mark_index;
|
33
|
+
idx_t mark_index {};
|
34
34
|
//! The columns of the LHS that are output by the join
|
35
35
|
vector<idx_t> left_projection_map;
|
36
36
|
//! The columns of the RHS that are output by the join
|
@@ -234,7 +234,7 @@ void DatabaseInstance::Initialize(const char *database_path, DBConfig *user_conf
|
|
234
234
|
if (!config.file_system) {
|
235
235
|
throw InternalException("No file system!?");
|
236
236
|
}
|
237
|
-
ExtensionHelper::LoadExternalExtension(*this, *config.file_system, config.options.database_type);
|
237
|
+
ExtensionHelper::LoadExternalExtension(*this, *config.file_system, config.options.database_type, nullptr);
|
238
238
|
}
|
239
239
|
|
240
240
|
if (!config.options.unrecognized_options.empty()) {
|
@@ -12,6 +12,10 @@
|
|
12
12
|
#define DUCKDB_EXTENSION_ICU_LINKED false
|
13
13
|
#endif
|
14
14
|
|
15
|
+
#ifndef DUCKDB_EXTENSION_EXCEL_LINKED
|
16
|
+
#define DUCKDB_EXTENSION_EXCEL_LINKED false
|
17
|
+
#endif
|
18
|
+
|
15
19
|
#ifndef DUCKDB_EXTENSION_PARQUET_LINKED
|
16
20
|
#define DUCKDB_EXTENSION_PARQUET_LINKED false
|
17
21
|
#endif
|
@@ -54,6 +58,10 @@
|
|
54
58
|
#include "icu_extension.hpp"
|
55
59
|
#endif
|
56
60
|
|
61
|
+
#if DUCKDB_EXTENSION_EXCEL_LINKED
|
62
|
+
#include "excel_extension.hpp"
|
63
|
+
#endif
|
64
|
+
|
57
65
|
#if DUCKDB_EXTENSION_PARQUET_LINKED
|
58
66
|
#include "parquet_extension.hpp"
|
59
67
|
#endif
|
@@ -94,6 +102,7 @@ namespace duckdb {
|
|
94
102
|
//===--------------------------------------------------------------------===//
|
95
103
|
static DefaultExtension internal_extensions[] = {
|
96
104
|
{"icu", "Adds support for time zones and collations using the ICU library", DUCKDB_EXTENSION_ICU_LINKED},
|
105
|
+
{"excel", "Adds support for Excel-like format strings", DUCKDB_EXTENSION_EXCEL_LINKED},
|
97
106
|
{"parquet", "Adds support for reading and writing parquet files", DUCKDB_EXTENSION_PARQUET_LINKED},
|
98
107
|
{"tpch", "Adds TPC-H data generation and query support", DUCKDB_EXTENSION_TPCH_LINKED},
|
99
108
|
{"tpcds", "Adds TPC-DS data generation and query support", DUCKDB_EXTENSION_TPCDS_LINKED},
|
@@ -101,12 +110,17 @@ static DefaultExtension internal_extensions[] = {
|
|
101
110
|
{"httpfs", "Adds support for reading and writing files over a HTTP(S) connection", DUCKDB_EXTENSION_HTTPFS_LINKED},
|
102
111
|
{"json", "Adds support for JSON operations", DUCKDB_EXTENSION_JSON_LINKED},
|
103
112
|
{"jemalloc", "Overwrites system allocator with JEMalloc", DUCKDB_EXTENSION_JEMALLOC_LINKED},
|
104
|
-
{"autocomplete", "
|
113
|
+
{"autocomplete", "Adds support for autocomplete in the shell", DUCKDB_EXTENSION_AUTOCOMPLETE_LINKED},
|
105
114
|
{"motherduck", "Enables motherduck integration with the system", false},
|
106
115
|
{"sqlite_scanner", "Adds support for reading SQLite database files", false},
|
107
116
|
{"postgres_scanner", "Adds support for reading from a Postgres database", false},
|
108
117
|
{"inet", "Adds support for IP-related data types and functions", false},
|
109
118
|
{"spatial", "Geospatial extension that adds support for working with spatial data and functions", false},
|
119
|
+
{"substrait", "Adds support for the Substrait integration", false},
|
120
|
+
{"aws", "Provides features that depend on the AWS SDK", false},
|
121
|
+
{"azure", "Adds a filesystem abstraction for Azure blob storage to DuckDB", false},
|
122
|
+
{"iceberg", "Adds support for Apache Iceberg", false},
|
123
|
+
{"visualizer", "Creates an HTML-based visualization of the query plan", false},
|
110
124
|
{nullptr, nullptr, false}};
|
111
125
|
|
112
126
|
idx_t ExtensionHelper::DefaultExtensionCount() {
|
@@ -45,7 +45,7 @@ const vector<string> ExtensionHelper::PathComponents() {
|
|
45
45
|
|
46
46
|
string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs) {
|
47
47
|
#ifdef WASM_LOADABLE_EXTENSIONS
|
48
|
-
|
48
|
+
static_assert(0, "ExtensionDirectory functionality is not supported in duckdb-wasm");
|
49
49
|
#endif
|
50
50
|
string extension_directory;
|
51
51
|
if (!config.options.extension_directory.empty()) { // create the extension directory if not present
|
@@ -157,6 +157,32 @@ void WriteExtensionFileToDisk(FileSystem &fs, const string &path, void *data, id
|
|
157
157
|
target_file.reset();
|
158
158
|
}
|
159
159
|
|
160
|
+
string ExtensionHelper::ExtensionUrlTemplate(optional_ptr<const ClientConfig> client_config, const string &repository) {
|
161
|
+
string default_endpoint = "http://extensions.duckdb.org";
|
162
|
+
string versioned_path = "/${REVISION}/${PLATFORM}/${NAME}.duckdb_extension.gz";
|
163
|
+
#ifdef WASM_LOADABLE_EXTENSIONS
|
164
|
+
versioned_path = "/duckdb-wasm" + versioned_path;
|
165
|
+
#endif
|
166
|
+
string custom_endpoint = client_config ? client_config->custom_extension_repo : string();
|
167
|
+
string endpoint;
|
168
|
+
if (!repository.empty()) {
|
169
|
+
endpoint = repository;
|
170
|
+
} else if (!custom_endpoint.empty()) {
|
171
|
+
endpoint = custom_endpoint;
|
172
|
+
} else {
|
173
|
+
endpoint = default_endpoint;
|
174
|
+
}
|
175
|
+
string url_template = endpoint + versioned_path;
|
176
|
+
return url_template;
|
177
|
+
}
|
178
|
+
|
179
|
+
string ExtensionHelper::ExtensionFinalizeUrlTemplate(const string &url_template, const string &extension_name) {
|
180
|
+
auto url = StringUtil::Replace(url_template, "${REVISION}", GetVersionDirectoryName());
|
181
|
+
url = StringUtil::Replace(url, "${PLATFORM}", DuckDB::Platform());
|
182
|
+
url = StringUtil::Replace(url, "${NAME}", extension_name);
|
183
|
+
return url;
|
184
|
+
}
|
185
|
+
|
160
186
|
void ExtensionHelper::InstallExtensionInternal(DBConfig &config, ClientConfig *client_config, FileSystem &fs,
|
161
187
|
const string &local_path, const string &extension, bool force_install,
|
162
188
|
const string &repository) {
|
@@ -197,27 +223,14 @@ void ExtensionHelper::InstallExtensionInternal(DBConfig &config, ClientConfig *c
|
|
197
223
|
throw BinderException("Remote extension installation is disabled through configuration");
|
198
224
|
#else
|
199
225
|
|
200
|
-
string
|
201
|
-
string custom_endpoint = client_config ? client_config->custom_extension_repo : string();
|
202
|
-
string endpoint;
|
203
|
-
if (!repository.empty()) {
|
204
|
-
endpoint = repository;
|
205
|
-
} else if (!custom_endpoint.empty()) {
|
206
|
-
endpoint = custom_endpoint;
|
207
|
-
} else {
|
208
|
-
endpoint = "http://extensions.duckdb.org";
|
209
|
-
}
|
210
|
-
|
211
|
-
string url_template = endpoint + versioned_path;
|
226
|
+
string url_template = ExtensionUrlTemplate(client_config, repository);
|
212
227
|
|
213
228
|
if (is_http_url) {
|
214
229
|
url_template = extension;
|
215
230
|
extension_name = "";
|
216
231
|
}
|
217
232
|
|
218
|
-
|
219
|
-
url = StringUtil::Replace(url, "${PLATFORM}", DuckDB::Platform());
|
220
|
-
url = StringUtil::Replace(url, "${NAME}", extension_name);
|
233
|
+
string url = ExtensionFinalizeUrlTemplate(url_template, extension_name);
|
221
234
|
|
222
235
|
string no_http = StringUtil::Replace(url, "http://", "");
|
223
236
|
|
@@ -227,7 +240,7 @@ void ExtensionHelper::InstallExtensionInternal(DBConfig &config, ClientConfig *c
|
|
227
240
|
}
|
228
241
|
|
229
242
|
// Special case to install extension from a local file, useful for testing
|
230
|
-
if (!StringUtil::Contains(
|
243
|
+
if (!StringUtil::Contains(url_template, "http://")) {
|
231
244
|
string file = fs.ConvertSeparators(url);
|
232
245
|
if (!fs.FileExists(file)) {
|
233
246
|
// check for non-gzipped variant
|
@@ -58,7 +58,8 @@ static void ComputeSHA256FileSegment(FileHandle *handle, const idx_t start, cons
|
|
58
58
|
#endif
|
59
59
|
|
60
60
|
bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const string &extension,
|
61
|
-
ExtensionInitResult &result, string &error
|
61
|
+
ExtensionInitResult &result, string &error,
|
62
|
+
optional_ptr<const ClientConfig> client_config) {
|
62
63
|
#ifdef DUCKDB_DISABLE_EXTENSION_LOAD
|
63
64
|
throw PermissionException("Loading external extensions is disabled through a compile time flag");
|
64
65
|
#else
|
@@ -69,6 +70,30 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const str
|
|
69
70
|
|
70
71
|
// shorthand case
|
71
72
|
if (!ExtensionHelper::IsFullPath(extension)) {
|
73
|
+
#ifdef WASM_LOADABLE_EXTENSIONS
|
74
|
+
string url_template = ExtensionUrlTemplate(client_config, "");
|
75
|
+
string url = ExtensionFinalizeUrlTemplate(url_template, extension_name);
|
76
|
+
|
77
|
+
char *str = (char *)EM_ASM_PTR(
|
78
|
+
{
|
79
|
+
var jsString = ((typeof runtime == = 'object') && runtime &&
|
80
|
+
(typeof runtime.whereToLoad == = 'function') && runtime.whereToLoad)
|
81
|
+
? runtime.whereToLoad(UTF8ToString($0))
|
82
|
+
: (UTF8ToString($1));
|
83
|
+
var lengthBytes = lengthBytesUTF8(jsString) + 1;
|
84
|
+
// 'jsString.length' would return the length of the string as UTF-16
|
85
|
+
// units, but Emscripten C strings operate as UTF-8.
|
86
|
+
var stringOnWasmHeap = _malloc(lengthBytes);
|
87
|
+
stringToUTF8(jsString, stringOnWasmHeap, lengthBytes);
|
88
|
+
return stringOnWasmHeap;
|
89
|
+
},
|
90
|
+
filename.c_str(), url.c_str());
|
91
|
+
std::string address(str);
|
92
|
+
free(str);
|
93
|
+
|
94
|
+
filename = address;
|
95
|
+
#else
|
96
|
+
|
72
97
|
string local_path =
|
73
98
|
!config.options.extension_directory.empty() ? config.options.extension_directory : fs.GetHomeDirectory();
|
74
99
|
|
@@ -82,6 +107,7 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const str
|
|
82
107
|
}
|
83
108
|
string extension_name = ApplyExtensionAlias(extension);
|
84
109
|
filename = fs.JoinPath(local_path, extension_name + ".duckdb_extension");
|
110
|
+
#endif
|
85
111
|
}
|
86
112
|
if (!fs.FileExists(filename)) {
|
87
113
|
string message;
|
@@ -216,17 +242,18 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const str
|
|
216
242
|
#endif
|
217
243
|
}
|
218
244
|
|
219
|
-
ExtensionInitResult ExtensionHelper::InitialLoad(DBConfig &config, FileSystem &fs, const string &extension
|
245
|
+
ExtensionInitResult ExtensionHelper::InitialLoad(DBConfig &config, FileSystem &fs, const string &extension,
|
246
|
+
optional_ptr<const ClientConfig> client_config) {
|
220
247
|
string error;
|
221
248
|
ExtensionInitResult result;
|
222
|
-
if (!TryInitialLoad(config, fs, extension, result, error)) {
|
249
|
+
if (!TryInitialLoad(config, fs, extension, result, error, client_config)) {
|
223
250
|
if (!ExtensionHelper::AllowAutoInstall(extension)) {
|
224
251
|
throw IOException(error);
|
225
252
|
}
|
226
253
|
// the extension load failed - try installing the extension
|
227
254
|
ExtensionHelper::InstallExtension(config, fs, extension, false);
|
228
255
|
// try loading again
|
229
|
-
if (!TryInitialLoad(config, fs, extension, result, error)) {
|
256
|
+
if (!TryInitialLoad(config, fs, extension, result, error, client_config)) {
|
230
257
|
throw IOException(error);
|
231
258
|
}
|
232
259
|
}
|
@@ -254,14 +281,15 @@ string ExtensionHelper::GetExtensionName(const string &original_name) {
|
|
254
281
|
return ExtensionHelper::ApplyExtensionAlias(splits.front());
|
255
282
|
}
|
256
283
|
|
257
|
-
void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileSystem &fs, const string &extension
|
284
|
+
void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileSystem &fs, const string &extension,
|
285
|
+
optional_ptr<const ClientConfig> client_config) {
|
258
286
|
if (db.ExtensionIsLoaded(extension)) {
|
259
287
|
return;
|
260
288
|
}
|
261
289
|
#ifdef DUCKDB_DISABLE_EXTENSION_LOAD
|
262
290
|
throw PermissionException("Loading external extensions is disabled through a compile time flag");
|
263
291
|
#else
|
264
|
-
auto res = InitialLoad(DBConfig::GetConfig(db), fs, extension);
|
292
|
+
auto res = InitialLoad(DBConfig::GetConfig(db), fs, extension, client_config);
|
265
293
|
auto init_fun_name = res.basename + "_init";
|
266
294
|
|
267
295
|
ext_init_fun_t init_fun;
|
@@ -279,7 +307,8 @@ void ExtensionHelper::LoadExternalExtension(DatabaseInstance &db, FileSystem &fs
|
|
279
307
|
}
|
280
308
|
|
281
309
|
void ExtensionHelper::LoadExternalExtension(ClientContext &context, const string &extension) {
|
282
|
-
LoadExternalExtension(DatabaseInstance::GetDatabase(context), FileSystem::GetFileSystem(context), extension
|
310
|
+
LoadExternalExtension(DatabaseInstance::GetDatabase(context), FileSystem::GetFileSystem(context), extension,
|
311
|
+
&ClientConfig::GetConfig(context));
|
283
312
|
}
|
284
313
|
|
285
314
|
string ExtensionHelper::ExtractExtensionPrefixFromPath(const string &path) {
|
@@ -3,11 +3,11 @@
|
|
3
3
|
#include "duckdb/common/allocator.hpp"
|
4
4
|
#include "duckdb/common/exception.hpp"
|
5
5
|
#include "duckdb/common/set.hpp"
|
6
|
-
#include "duckdb/storage/in_memory_block_manager.hpp"
|
7
|
-
#include "duckdb/storage/storage_manager.hpp"
|
8
6
|
#include "duckdb/main/attached_database.hpp"
|
9
7
|
#include "duckdb/main/database.hpp"
|
10
8
|
#include "duckdb/storage/buffer/buffer_pool.hpp"
|
9
|
+
#include "duckdb/storage/in_memory_block_manager.hpp"
|
10
|
+
#include "duckdb/storage/storage_manager.hpp"
|
11
11
|
|
12
12
|
namespace duckdb {
|
13
13
|
|
@@ -329,12 +329,13 @@ private:
|
|
329
329
|
};
|
330
330
|
|
331
331
|
class TemporaryFileHandle {
|
332
|
-
constexpr static idx_t
|
332
|
+
constexpr static idx_t MAX_ALLOWED_INDEX_BASE = 4000;
|
333
333
|
|
334
334
|
public:
|
335
|
-
TemporaryFileHandle(DatabaseInstance &db, const string &temp_directory, idx_t index)
|
336
|
-
: db(db), file_index(index),
|
337
|
-
|
335
|
+
TemporaryFileHandle(idx_t temp_file_count, DatabaseInstance &db, const string &temp_directory, idx_t index)
|
336
|
+
: max_allowed_index((1 << temp_file_count) * MAX_ALLOWED_INDEX_BASE), db(db), file_index(index),
|
337
|
+
path(FileSystem::GetFileSystem(db).JoinPath(temp_directory,
|
338
|
+
"duckdb_temp_storage-" + to_string(index) + ".tmp")) {
|
338
339
|
}
|
339
340
|
|
340
341
|
public:
|
@@ -348,7 +349,7 @@ public:
|
|
348
349
|
public:
|
349
350
|
TemporaryFileIndex TryGetBlockIndex() {
|
350
351
|
TemporaryFileLock lock(file_lock);
|
351
|
-
if (index_manager.GetMaxIndex() >=
|
352
|
+
if (index_manager.GetMaxIndex() >= max_allowed_index && index_manager.HasFreeBlocks()) {
|
352
353
|
// file is at capacity
|
353
354
|
return TemporaryFileIndex();
|
354
355
|
}
|
@@ -426,6 +427,7 @@ private:
|
|
426
427
|
}
|
427
428
|
|
428
429
|
private:
|
430
|
+
const idx_t max_allowed_index;
|
429
431
|
DatabaseInstance &db;
|
430
432
|
unique_ptr<FileHandle> handle;
|
431
433
|
idx_t file_index;
|
@@ -467,7 +469,7 @@ public:
|
|
467
469
|
if (!handle) {
|
468
470
|
// no existing handle to write to; we need to create & open a new file
|
469
471
|
auto new_file_index = index_manager.GetNewBlockIndex();
|
470
|
-
auto new_file = make_uniq<TemporaryFileHandle>(db, temp_directory, new_file_index);
|
472
|
+
auto new_file = make_uniq<TemporaryFileHandle>(files.size(), db, temp_directory, new_file_index);
|
471
473
|
handle = new_file.get();
|
472
474
|
files[new_file_index] = std::move(new_file);
|
473
475
|
|
@@ -17,8 +17,10 @@ public:
|
|
17
17
|
static std::string ComputeSha256Hash(const std::string& file_content);
|
18
18
|
static bool IsValidSha256Signature(const std::string& pubkey, const std::string& signature, const std::string& sha256_hash);
|
19
19
|
static void Hmac256(const char* key, size_t key_len, const char* message, size_t message_len, char* out);
|
20
|
+
static void ToBase16(char *in, char *out, size_t len);
|
20
21
|
|
21
|
-
static constexpr size_t
|
22
|
+
static constexpr size_t SHA256_HASH_LENGTH_BYTES = 32;
|
23
|
+
static constexpr size_t SHA256_HASH_LENGTH_TEXT = 64;
|
22
24
|
|
23
25
|
class SHA256State {
|
24
26
|
public:
|
@@ -26,6 +28,7 @@ public:
|
|
26
28
|
~SHA256State();
|
27
29
|
void AddString(const std::string & str);
|
28
30
|
std::string Finalize();
|
31
|
+
void FinishHex(char *out);
|
29
32
|
private:
|
30
33
|
void *sha_context;
|
31
34
|
};
|
@@ -38,7 +38,7 @@ void MbedTlsWrapper::ComputeSha256Hash(const char* in, size_t in_len, char* out)
|
|
38
38
|
|
39
39
|
string MbedTlsWrapper::ComputeSha256Hash(const string& file_content) {
|
40
40
|
string hash;
|
41
|
-
hash.resize(MbedTlsWrapper::
|
41
|
+
hash.resize(MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
|
42
42
|
ComputeSha256Hash(file_content.data(), file_content.size(), (char*)hash.data());
|
43
43
|
return hash;
|
44
44
|
}
|
@@ -83,6 +83,17 @@ void MbedTlsWrapper::Hmac256(const char* key, size_t key_len, const char* messag
|
|
83
83
|
mbedtls_md_free(&hmac_ctx);
|
84
84
|
}
|
85
85
|
|
86
|
+
void MbedTlsWrapper::ToBase16(char *in, char *out, size_t len) {
|
87
|
+
static char const HEX_CODES[] = "0123456789abcdef";
|
88
|
+
size_t i, j;
|
89
|
+
|
90
|
+
for (j = i = 0; i < len; i++) {
|
91
|
+
int a = in[i];
|
92
|
+
out[j++] = HEX_CODES[(a >> 4) & 0xf];
|
93
|
+
out[j++] = HEX_CODES[a & 0xf];
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
86
97
|
MbedTlsWrapper::SHA256State::SHA256State() : sha_context(new mbedtls_sha256_context()) {
|
87
98
|
mbedtls_sha256_init((mbedtls_sha256_context*)sha_context);
|
88
99
|
|
@@ -104,7 +115,7 @@ void MbedTlsWrapper::SHA256State::AddString(const std::string & str) {
|
|
104
115
|
|
105
116
|
std::string MbedTlsWrapper::SHA256State::Finalize() {
|
106
117
|
string hash;
|
107
|
-
hash.resize(MbedTlsWrapper::
|
118
|
+
hash.resize(MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
|
108
119
|
|
109
120
|
if (mbedtls_sha256_finish((mbedtls_sha256_context*)sha_context, (unsigned char*)hash.data())) {
|
110
121
|
throw std::runtime_error("SHA256 Error");
|
@@ -112,3 +123,14 @@ std::string MbedTlsWrapper::SHA256State::Finalize() {
|
|
112
123
|
|
113
124
|
return hash;
|
114
125
|
}
|
126
|
+
|
127
|
+
void MbedTlsWrapper::SHA256State::FinishHex(char *out) {
|
128
|
+
string hash;
|
129
|
+
hash.resize(MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
|
130
|
+
|
131
|
+
if (mbedtls_sha256_finish((mbedtls_sha256_context *)sha_context, (unsigned char *)hash.data())) {
|
132
|
+
throw std::runtime_error("SHA256 Error");
|
133
|
+
}
|
134
|
+
|
135
|
+
MbedTlsWrapper::ToBase16(const_cast<char *>(hash.c_str()), out, MbedTlsWrapper::SHA256_HASH_LENGTH_BYTES);
|
136
|
+
}
|
@@ -348,9 +348,11 @@
|
|
348
348
|
|
349
349
|
#include "extension/icu/third_party/icu/i18n/wintzimpl.cpp"
|
350
350
|
|
351
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
351
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp"
|
352
352
|
|
353
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
353
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
|
354
|
+
|
355
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp"
|
354
356
|
|
355
357
|
#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp"
|
356
358
|
|
@@ -358,7 +360,5 @@
|
|
358
360
|
|
359
361
|
#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp"
|
360
362
|
|
361
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-
|
362
|
-
|
363
|
-
#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp"
|
363
|
+
#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp"
|
364
364
|
|