duckdb 0.5.1-dev225.0 → 0.5.1-dev240.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +80 -36
- package/src/duckdb.hpp +2 -2
- package/src/parquet-amalgamation.cpp +37503 -37503
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -39580,7 +39580,7 @@ public:
|
|
|
39580
39580
|
namespace duckdb {
|
|
39581
39581
|
|
|
39582
39582
|
enum class UnicodeType { INVALID, ASCII, UNICODE };
|
|
39583
|
-
enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE };
|
|
39583
|
+
enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE, INVALID_UNICODE };
|
|
39584
39584
|
|
|
39585
39585
|
class Utf8Proc {
|
|
39586
39586
|
public:
|
|
@@ -143368,6 +143368,7 @@ private:
|
|
|
143368
143368
|
|
|
143369
143369
|
|
|
143370
143370
|
|
|
143371
|
+
|
|
143371
143372
|
namespace duckdb {
|
|
143372
143373
|
|
|
143373
143374
|
class DeliminatorPlanUpdater : LogicalOperatorVisitor {
|
|
@@ -143395,7 +143396,15 @@ void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
|
|
|
143395
143396
|
cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
|
|
143396
143397
|
continue;
|
|
143397
143398
|
}
|
|
143398
|
-
|
|
143399
|
+
Expression *rhs = cond.right.get();
|
|
143400
|
+
while (rhs->type == ExpressionType::OPERATOR_CAST) {
|
|
143401
|
+
auto &cast = (BoundCastExpression &)*rhs;
|
|
143402
|
+
rhs = cast.child.get();
|
|
143403
|
+
}
|
|
143404
|
+
if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
|
|
143405
|
+
throw InternalException("Erorr in deliminator: expected a bound column reference");
|
|
143406
|
+
}
|
|
143407
|
+
auto &colref = (BoundColumnRefExpression &)*rhs;
|
|
143399
143408
|
if (projection_map.find(colref.binding) != projection_map.end()) {
|
|
143400
143409
|
// value on the right is a projection of removed DelimGet
|
|
143401
143410
|
for (idx_t i = 0; i < decs->size(); i++) {
|
|
@@ -260490,49 +260499,84 @@ static void AssignInvalidUTF8Reason(UnicodeInvalidReason *invalid_reason, size_t
|
|
|
260490
260499
|
}
|
|
260491
260500
|
}
|
|
260492
260501
|
|
|
260493
|
-
|
|
260494
|
-
|
|
260495
|
-
|
|
260496
|
-
|
|
260497
|
-
|
|
260498
|
-
|
|
260499
|
-
|
|
260500
|
-
|
|
260501
|
-
|
|
260502
|
-
|
|
260503
|
-
|
|
260504
|
-
|
|
260505
|
-
|
|
260506
|
-
|
|
260507
|
-
if ((s[++i] & 0xC0) != 0x80) {
|
|
260508
|
-
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260509
|
-
return UnicodeType::INVALID;
|
|
260510
|
-
}
|
|
260511
|
-
if ((c & 0xE0) == 0xC0) {
|
|
260512
|
-
continue;
|
|
260513
|
-
}
|
|
260514
|
-
if ((s[++i] & 0xC0) != 0x80) {
|
|
260515
|
-
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260516
|
-
return UnicodeType::INVALID;
|
|
260517
|
-
}
|
|
260518
|
-
if ((c & 0xF0) == 0xE0) {
|
|
260519
|
-
continue;
|
|
260520
|
-
}
|
|
260521
|
-
if ((s[++i] & 0xC0) != 0x80) {
|
|
260502
|
+
template <const int nextra_bytes, const int mask>
|
|
260503
|
+
static inline UnicodeType
|
|
260504
|
+
UTF8ExtraByteLoop(const int first_pos_seq, int utf8char, size_t& i,
|
|
260505
|
+
const char *s, const size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
|
|
260506
|
+
if ((len - i) < (nextra_bytes + 1)) {
|
|
260507
|
+
/* incomplete byte sequence */
|
|
260508
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260509
|
+
return UnicodeType::INVALID;
|
|
260510
|
+
}
|
|
260511
|
+
for (size_t j = 0 ; j < nextra_bytes; j++) {
|
|
260512
|
+
int c = (int) s[++i];
|
|
260513
|
+
/* now validate the extra bytes */
|
|
260514
|
+
if ((c & 0xC0) != 0x80) {
|
|
260515
|
+
/* extra byte is not in the format 10xxxxxx */
|
|
260522
260516
|
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260523
260517
|
return UnicodeType::INVALID;
|
|
260524
260518
|
}
|
|
260525
|
-
|
|
260526
|
-
|
|
260527
|
-
|
|
260528
|
-
|
|
260519
|
+
utf8char = (utf8char << 6) | (c & 0x3F);
|
|
260520
|
+
}
|
|
260521
|
+
if ((utf8char & mask) == 0) {
|
|
260522
|
+
/* invalid UTF-8 codepoint, not shortest possible */
|
|
260523
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
|
|
260529
260524
|
return UnicodeType::INVALID;
|
|
260530
260525
|
}
|
|
260526
|
+
if (utf8char > 0x10FFFF) {
|
|
260527
|
+
/* value not representable by Unicode */
|
|
260528
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
|
|
260529
|
+
return UnicodeType::INVALID;
|
|
260530
|
+
}
|
|
260531
|
+
if ((utf8char & 0x1FFF800) == 0xD800) {
|
|
260532
|
+
/* Unicode characters from U+D800 to U+DFFF are surrogate characters used by UTF-16 which are invalid in UTF-8 */
|
|
260533
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
|
|
260534
|
+
return UnicodeType::INVALID;
|
|
260535
|
+
}
|
|
260536
|
+
return UnicodeType::UNICODE;
|
|
260537
|
+
}
|
|
260538
|
+
|
|
260539
|
+
UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
|
|
260540
|
+
UnicodeType type = UnicodeType::ASCII;
|
|
260531
260541
|
|
|
260542
|
+
for (size_t i = 0; i < len; i++) {
|
|
260543
|
+
int c = (int) s[i];
|
|
260544
|
+
|
|
260545
|
+
if ((c & 0x80) == 0) {
|
|
260546
|
+
/* 1 byte sequence */
|
|
260547
|
+
if (c == '\0') {
|
|
260548
|
+
/* NULL byte not allowed */
|
|
260549
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
|
|
260550
|
+
return UnicodeType::INVALID;
|
|
260551
|
+
}
|
|
260552
|
+
} else {
|
|
260553
|
+
int first_pos_seq = i;
|
|
260554
|
+
|
|
260555
|
+
if ((c & 0xE0) == 0xC0) {
|
|
260556
|
+
/* 2 byte sequence */
|
|
260557
|
+
int utf8char = c & 0x1F;
|
|
260558
|
+
type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
260559
|
+
} else if ((c & 0xF0) == 0xE0) {
|
|
260560
|
+
/* 3 byte sequence */
|
|
260561
|
+
int utf8char = c & 0x0F;
|
|
260562
|
+
type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
260563
|
+
} else if ((c & 0xF8) == 0xF0) {
|
|
260564
|
+
/* 4 byte sequence */
|
|
260565
|
+
int utf8char = c & 0x07;
|
|
260566
|
+
type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
|
|
260567
|
+
} else {
|
|
260568
|
+
/* invalid UTF-8 start byte */
|
|
260569
|
+
AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
|
|
260570
|
+
return UnicodeType::INVALID;
|
|
260571
|
+
}
|
|
260572
|
+
if (type == UnicodeType::INVALID) {
|
|
260573
|
+
return type;
|
|
260574
|
+
}
|
|
260575
|
+
}
|
|
260576
|
+
}
|
|
260532
260577
|
return type;
|
|
260533
260578
|
}
|
|
260534
260579
|
|
|
260535
|
-
|
|
260536
260580
|
char* Utf8Proc::Normalize(const char *s, size_t len) {
|
|
260537
260581
|
assert(s);
|
|
260538
260582
|
assert(Utf8Proc::Analyze(s, len) != UnicodeType::INVALID);
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.5.1-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "5e9609587"
|
|
15
|
+
#define DUCKDB_VERSION "v0.5.1-dev240"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|