duckdb 0.5.1-dev222.0 → 0.5.1-dev237.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +95 -52
- package/src/duckdb.hpp +4 -3
- package/src/parquet-amalgamation.cpp +37089 -37089
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -39580,7 +39580,7 @@ public:
|
|
|
39580
39580
|
namespace duckdb {
|
|
39581
39581
|
|
|
39582
39582
|
enum class UnicodeType { INVALID, ASCII, UNICODE };
|
|
39583
|
-
enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE };
|
|
39583
|
+
enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE, INVALID_UNICODE };
|
|
39584
39584
|
|
|
39585
39585
|
class Utf8Proc {
|
|
39586
39586
|
public:
|
|
@@ -80446,11 +80446,10 @@ unique_ptr<GlobalSinkState> PhysicalCreateTableAs::GetGlobalSinkState(ClientCont
|
|
|
80446
80446
|
SinkResultType PhysicalCreateTableAs::Sink(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p,
|
|
80447
80447
|
DataChunk &input) const {
|
|
80448
80448
|
auto &sink = (CreateTableAsGlobalState &)state;
|
|
80449
|
-
|
|
80450
|
-
|
|
80451
|
-
|
|
80452
|
-
|
|
80453
|
-
}
|
|
80449
|
+
D_ASSERT(sink.table);
|
|
80450
|
+
lock_guard<mutex> client_guard(sink.append_lock);
|
|
80451
|
+
sink.table->storage->Append(*sink.table, context.client, input);
|
|
80452
|
+
sink.inserted_count += input.size();
|
|
80454
80453
|
return SinkResultType::NEED_MORE_INPUT;
|
|
80455
80454
|
}
|
|
80456
80455
|
|
|
@@ -153461,10 +153460,10 @@ PendingExecutionResult Executor::ExecuteTask() {
|
|
|
153461
153460
|
lock_guard<mutex> elock(executor_lock);
|
|
153462
153461
|
pipelines.clear();
|
|
153463
153462
|
NextExecutor();
|
|
153464
|
-
if (
|
|
153463
|
+
if (HasError()) { // LCOV_EXCL_START
|
|
153465
153464
|
// an exception has occurred executing one of the pipelines
|
|
153466
153465
|
execution_result = PendingExecutionResult::EXECUTION_ERROR;
|
|
153467
|
-
|
|
153466
|
+
ThrowException();
|
|
153468
153467
|
} // LCOV_EXCL_STOP
|
|
153469
153468
|
execution_result = PendingExecutionResult::RESULT_READY;
|
|
153470
153469
|
return execution_result;
|
|
@@ -153510,7 +153509,7 @@ vector<LogicalType> Executor::GetTypes() {
|
|
|
153510
153509
|
}
|
|
153511
153510
|
|
|
153512
153511
|
void Executor::PushError(PreservedError exception) {
|
|
153513
|
-
lock_guard<mutex> elock(
|
|
153512
|
+
lock_guard<mutex> elock(error_lock);
|
|
153514
153513
|
// interrupt execution of any other pipelines that belong to this executor
|
|
153515
153514
|
context.interrupted = true;
|
|
153516
153515
|
// push the exception onto the stack
|
|
@@ -153518,20 +153517,16 @@ void Executor::PushError(PreservedError exception) {
|
|
|
153518
153517
|
}
|
|
153519
153518
|
|
|
153520
153519
|
bool Executor::HasError() {
|
|
153521
|
-
lock_guard<mutex> elock(
|
|
153520
|
+
lock_guard<mutex> elock(error_lock);
|
|
153522
153521
|
return !exceptions.empty();
|
|
153523
153522
|
}
|
|
153524
153523
|
|
|
153525
153524
|
void Executor::ThrowException() {
|
|
153526
|
-
lock_guard<mutex> elock(
|
|
153527
|
-
ThrowExceptionInternal();
|
|
153528
|
-
}
|
|
153529
|
-
|
|
153530
|
-
void Executor::ThrowExceptionInternal() { // LCOV_EXCL_START
|
|
153525
|
+
lock_guard<mutex> elock(error_lock);
|
|
153531
153526
|
D_ASSERT(!exceptions.empty());
|
|
153532
153527
|
auto &entry = exceptions[0];
|
|
153533
153528
|
entry.Throw();
|
|
153534
|
-
}
|
|
153529
|
+
}
|
|
153535
153530
|
|
|
153536
153531
|
void Executor::Flush(ThreadContext &tcontext) {
|
|
153537
153532
|
profiler->Flush(tcontext.profiler);
|
|
@@ -153796,6 +153791,9 @@ void Pipeline::Ready() {
|
|
|
153796
153791
|
}
|
|
153797
153792
|
|
|
153798
153793
|
void Pipeline::Finalize(Event &event) {
|
|
153794
|
+
if (executor.HasError()) {
|
|
153795
|
+
return;
|
|
153796
|
+
}
|
|
153799
153797
|
D_ASSERT(ready);
|
|
153800
153798
|
try {
|
|
153801
153799
|
auto sink_state = sink->Finalize(*this, event, executor.context, *sink->sink_state);
|
|
@@ -153906,6 +153904,7 @@ void PipelineCompleteEvent::FinalizeFinish() {
|
|
|
153906
153904
|
} // namespace duckdb
|
|
153907
153905
|
|
|
153908
153906
|
|
|
153907
|
+
|
|
153909
153908
|
namespace duckdb {
|
|
153910
153909
|
|
|
153911
153910
|
PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEvent(move(pipeline_p)) {
|
|
@@ -153913,8 +153912,17 @@ PipelineEvent::PipelineEvent(shared_ptr<Pipeline> pipeline_p) : BasePipelineEven
|
|
|
153913
153912
|
|
|
153914
153913
|
void PipelineEvent::Schedule() {
|
|
153915
153914
|
auto event = shared_from_this();
|
|
153916
|
-
pipeline->
|
|
153917
|
-
|
|
153915
|
+
auto &executor = pipeline->executor;
|
|
153916
|
+
try {
|
|
153917
|
+
pipeline->Schedule(event);
|
|
153918
|
+
D_ASSERT(total_tasks > 0);
|
|
153919
|
+
} catch (Exception &ex) {
|
|
153920
|
+
executor.PushError(PreservedError(ex));
|
|
153921
|
+
} catch (std::exception &ex) {
|
|
153922
|
+
executor.PushError(PreservedError(ex));
|
|
153923
|
+
} catch (...) { // LCOV_EXCL_START
|
|
153924
|
+
executor.PushError(PreservedError("Unknown exception in Finalize!"));
|
|
153925
|
+
} // LCOV_EXCL_STOP
|
|
153918
153926
|
}
|
|
153919
153927
|
|
|
153920
153928
|
void PipelineEvent::FinishEvent() {
|
|
@@ -260482,49 +260490,84 @@ static void AssignInvalidUTF8Reason(UnicodeInvalidReason *invalid_reason, size_t
|
|
|
260482
260490
|
}
|
|
260483
260491
|
}
|
|
260484
260492
|
|
|
260485
|
-
|
|
260486
|
-
|
|
260487
|
-
|
|
260488
|
-
|
|
260489
|
-
|
|
260490
|
-
|
|
260491
|
-
|
|
260492
|
-
|
|
260493
|
-
|
|
260494
|
-
|
|
260495
|
-
|
|
260496
|
-
|
|
260497
|
-
|
|
260498
|
-
|
|
260499
|
-
if ((s[++i] & 0xC0) != 0x80) {
|
|
260500
|
-
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260501
|
-
return UnicodeType::INVALID;
|
|
260502
|
-
}
|
|
260503
|
-
if ((c & 0xE0) == 0xC0) {
|
|
260504
|
-
continue;
|
|
260505
|
-
}
|
|
260506
|
-
if ((s[++i] & 0xC0) != 0x80) {
|
|
260507
|
-
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260508
|
-
return UnicodeType::INVALID;
|
|
260509
|
-
}
|
|
260510
|
-
if ((c & 0xF0) == 0xE0) {
|
|
260511
|
-
continue;
|
|
260512
|
-
}
|
|
260513
|
-
if ((s[++i] & 0xC0) != 0x80) {
|
|
260493
|
+
template <const int nextra_bytes, const int mask>
|
|
260494
|
+
static inline UnicodeType
|
|
260495
|
+
UTF8ExtraByteLoop(const int first_pos_seq, int utf8char, size_t& i,
|
|
260496
|
+
const char *s, const size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
|
|
260497
|
+
if ((len - i) < (nextra_bytes + 1)) {
|
|
260498
|
+
/* incomplete byte sequence */
|
|
260499
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260500
|
+
return UnicodeType::INVALID;
|
|
260501
|
+
}
|
|
260502
|
+
for (size_t j = 0 ; j < nextra_bytes; j++) {
|
|
260503
|
+
int c = (int) s[++i];
|
|
260504
|
+
/* now validate the extra bytes */
|
|
260505
|
+
if ((c & 0xC0) != 0x80) {
|
|
260506
|
+
/* extra byte is not in the format 10xxxxxx */
|
|
260514
260507
|
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260515
260508
|
return UnicodeType::INVALID;
|
|
260516
260509
|
}
|
|
260517
|
-
|
|
260518
|
-
|
|
260519
|
-
|
|
260520
|
-
|
|
260510
|
+
utf8char = (utf8char << 6) | (c & 0x3F);
|
|
260511
|
+
}
|
|
260512
|
+
if ((utf8char & mask) == 0) {
|
|
260513
|
+
/* invalid UTF-8 codepoint, not shortest possible */
|
|
260514
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
|
|
260515
|
+
return UnicodeType::INVALID;
|
|
260516
|
+
}
|
|
260517
|
+
if (utf8char > 0x10FFFF) {
|
|
260518
|
+
/* value not representable by Unicode */
|
|
260519
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
|
|
260520
|
+
return UnicodeType::INVALID;
|
|
260521
|
+
}
|
|
260522
|
+
if ((utf8char & 0x1FFF800) == 0xD800) {
|
|
260523
|
+
/* Unicode characters from U+D800 to U+DFFF are surrogate characters used by UTF-16 which are invalid in UTF-8 */
|
|
260524
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
|
|
260521
260525
|
return UnicodeType::INVALID;
|
|
260522
260526
|
}
|
|
260527
|
+
return UnicodeType::UNICODE;
|
|
260528
|
+
}
|
|
260529
|
+
|
|
260530
|
+
UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
|
|
260531
|
+
UnicodeType type = UnicodeType::ASCII;
|
|
260523
260532
|
|
|
260533
|
+
for (size_t i = 0; i < len; i++) {
|
|
260534
|
+
int c = (int) s[i];
|
|
260535
|
+
|
|
260536
|
+
if ((c & 0x80) == 0) {
|
|
260537
|
+
/* 1 byte sequence */
|
|
260538
|
+
if (c == '\0') {
|
|
260539
|
+
/* NULL byte not allowed */
|
|
260540
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
|
|
260541
|
+
return UnicodeType::INVALID;
|
|
260542
|
+
}
|
|
260543
|
+
} else {
|
|
260544
|
+
int first_pos_seq = i;
|
|
260545
|
+
|
|
260546
|
+
if ((c & 0xE0) == 0xC0) {
|
|
260547
|
+
/* 2 byte sequence */
|
|
260548
|
+
int utf8char = c & 0x1F;
|
|
260549
|
+
type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
260550
|
+
} else if ((c & 0xF0) == 0xE0) {
|
|
260551
|
+
/* 3 byte sequence */
|
|
260552
|
+
int utf8char = c & 0x0F;
|
|
260553
|
+
type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
260554
|
+
} else if ((c & 0xF8) == 0xF0) {
|
|
260555
|
+
/* 4 byte sequence */
|
|
260556
|
+
int utf8char = c & 0x07;
|
|
260557
|
+
type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
260558
|
+
} else {
|
|
260559
|
+
/* invalid UTF-8 start byte */
|
|
260560
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260561
|
+
return UnicodeType::INVALID;
|
|
260562
|
+
}
|
|
260563
|
+
if (type == UnicodeType::INVALID) {
|
|
260564
|
+
return type;
|
|
260565
|
+
}
|
|
260566
|
+
}
|
|
260567
|
+
}
|
|
260524
260568
|
return type;
|
|
260525
260569
|
}
|
|
260526
260570
|
|
|
260527
|
-
|
|
260528
260571
|
char* Utf8Proc::Normalize(const char *s, size_t len) {
|
|
260529
260572
|
assert(s);
|
|
260530
260573
|
assert(Utf8Proc::Analyze(s, len) != UnicodeType::INVALID);
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "a991beaf1"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.1-dev237"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -12959,6 +12959,7 @@ public:
|
|
|
12959
12959
|
|
|
12960
12960
|
//! Push a new error
|
|
12961
12961
|
void PushError(PreservedError exception);
|
|
12962
|
+
|
|
12962
12963
|
//! True if an error has been thrown
|
|
12963
12964
|
bool HasError();
|
|
12964
12965
|
//! Throw the exception that was pushed using PushError.
|
|
@@ -13006,13 +13007,13 @@ private:
|
|
|
13006
13007
|
|
|
13007
13008
|
void VerifyPipeline(Pipeline &pipeline);
|
|
13008
13009
|
void VerifyPipelines();
|
|
13009
|
-
void ThrowExceptionInternal();
|
|
13010
13010
|
|
|
13011
13011
|
private:
|
|
13012
13012
|
PhysicalOperator *physical_plan;
|
|
13013
13013
|
unique_ptr<PhysicalOperator> owned_plan;
|
|
13014
13014
|
|
|
13015
13015
|
mutex executor_lock;
|
|
13016
|
+
mutex error_lock;
|
|
13016
13017
|
//! The pipelines of the current query
|
|
13017
13018
|
vector<shared_ptr<Pipeline>> pipelines;
|
|
13018
13019
|
//! The root pipeline of the query
|