duckdb 0.5.1-dev225.0 → 0.5.1-dev240.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.1-dev225.0",
4
+ "version": "0.5.1-dev240.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -39580,7 +39580,7 @@ public:
39580
39580
  namespace duckdb {
39581
39581
 
39582
39582
  enum class UnicodeType { INVALID, ASCII, UNICODE };
39583
- enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE };
39583
+ enum class UnicodeInvalidReason { BYTE_MISMATCH, NULL_BYTE, INVALID_UNICODE };
39584
39584
 
39585
39585
  class Utf8Proc {
39586
39586
  public:
@@ -143368,6 +143368,7 @@ private:
143368
143368
 
143369
143369
 
143370
143370
 
143371
+
143371
143372
  namespace duckdb {
143372
143373
 
143373
143374
  class DeliminatorPlanUpdater : LogicalOperatorVisitor {
@@ -143395,7 +143396,15 @@ void DeliminatorPlanUpdater::VisitOperator(LogicalOperator &op) {
143395
143396
  cond.comparison != ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
143396
143397
  continue;
143397
143398
  }
143398
- auto &colref = (BoundColumnRefExpression &)*cond.right;
143399
+ Expression *rhs = cond.right.get();
143400
+ while (rhs->type == ExpressionType::OPERATOR_CAST) {
143401
+ auto &cast = (BoundCastExpression &)*rhs;
143402
+ rhs = cast.child.get();
143403
+ }
143404
+ if (rhs->type != ExpressionType::BOUND_COLUMN_REF) {
143405
+ throw InternalException("Erorr in deliminator: expected a bound column reference");
143406
+ }
143407
+ auto &colref = (BoundColumnRefExpression &)*rhs;
143399
143408
  if (projection_map.find(colref.binding) != projection_map.end()) {
143400
143409
  // value on the right is a projection of removed DelimGet
143401
143410
  for (idx_t i = 0; i < decs->size(); i++) {
@@ -260490,49 +260499,84 @@ static void AssignInvalidUTF8Reason(UnicodeInvalidReason *invalid_reason, size_t
260490
260499
  }
260491
260500
  }
260492
260501
 
260493
- UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260494
- UnicodeType type = UnicodeType::ASCII;
260495
- char c;
260496
- for (size_t i = 0; i < len; i++) {
260497
- c = s[i];
260498
- if (c == '\0') {
260499
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
260500
- return UnicodeType::INVALID;
260501
- }
260502
- // 1 Byte / ASCII
260503
- if ((c & 0x80) == 0) {
260504
- continue;
260505
- }
260506
- type = UnicodeType::UNICODE;
260507
- if ((s[++i] & 0xC0) != 0x80) {
260508
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260509
- return UnicodeType::INVALID;
260510
- }
260511
- if ((c & 0xE0) == 0xC0) {
260512
- continue;
260513
- }
260514
- if ((s[++i] & 0xC0) != 0x80) {
260515
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260516
- return UnicodeType::INVALID;
260517
- }
260518
- if ((c & 0xF0) == 0xE0) {
260519
- continue;
260520
- }
260521
- if ((s[++i] & 0xC0) != 0x80) {
260502
+ template <const int nextra_bytes, const int mask>
260503
+ static inline UnicodeType
260504
+ UTF8ExtraByteLoop(const int first_pos_seq, int utf8char, size_t& i,
260505
+ const char *s, const size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260506
+ if ((len - i) < (nextra_bytes + 1)) {
260507
+ /* incomplete byte sequence */
260508
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::BYTE_MISMATCH);
260509
+ return UnicodeType::INVALID;
260510
+ }
260511
+ for (size_t j = 0 ; j < nextra_bytes; j++) {
260512
+ int c = (int) s[++i];
260513
+ /* now validate the extra bytes */
260514
+ if ((c & 0xC0) != 0x80) {
260515
+ /* extra byte is not in the format 10xxxxxx */
260522
260516
  AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260523
260517
  return UnicodeType::INVALID;
260524
260518
  }
260525
- if ((c & 0xF8) == 0xF0) {
260526
- continue;
260527
- }
260528
- AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260519
+ utf8char = (utf8char << 6) | (c & 0x3F);
260520
+ }
260521
+ if ((utf8char & mask) == 0) {
260522
+ /* invalid UTF-8 codepoint, not shortest possible */
260523
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260529
260524
  return UnicodeType::INVALID;
260530
260525
  }
260526
+ if (utf8char > 0x10FFFF) {
260527
+ /* value not representable by Unicode */
260528
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260529
+ return UnicodeType::INVALID;
260530
+ }
260531
+ if ((utf8char & 0x1FFF800) == 0xD800) {
260532
+ /* Unicode characters from U+D800 to U+DFFF are surrogate characters used by UTF-16 which are invalid in UTF-8 */
260533
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, first_pos_seq, UnicodeInvalidReason::INVALID_UNICODE);
260534
+ return UnicodeType::INVALID;
260535
+ }
260536
+ return UnicodeType::UNICODE;
260537
+ }
260538
+
260539
+ UnicodeType Utf8Proc::Analyze(const char *s, size_t len, UnicodeInvalidReason *invalid_reason, size_t *invalid_pos) {
260540
+ UnicodeType type = UnicodeType::ASCII;
260531
260541
 
260542
+ for (size_t i = 0; i < len; i++) {
260543
+ int c = (int) s[i];
260544
+
260545
+ if ((c & 0x80) == 0) {
260546
+ /* 1 byte sequence */
260547
+ if (c == '\0') {
260548
+ /* NULL byte not allowed */
260549
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::NULL_BYTE);
260550
+ return UnicodeType::INVALID;
260551
+ }
260552
+ } else {
260553
+ int first_pos_seq = i;
260554
+
260555
+ if ((c & 0xE0) == 0xC0) {
260556
+ /* 2 byte sequence */
260557
+ int utf8char = c & 0x1F;
260558
+ type = UTF8ExtraByteLoop<1, 0x000780>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260559
+ } else if ((c & 0xF0) == 0xE0) {
260560
+ /* 3 byte sequence */
260561
+ int utf8char = c & 0x0F;
260562
+ type = UTF8ExtraByteLoop<2, 0x00F800>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260563
+ } else if ((c & 0xF8) == 0xF0) {
260564
+ /* 4 byte sequence */
260565
+ int utf8char = c & 0x07;
260566
+ type = UTF8ExtraByteLoop<3, 0x1F0000>(first_pos_seq, utf8char, i, s, len, invalid_reason, invalid_pos);
260567
+ } else {
260568
+ /* invalid UTF-8 start byte */
260569
+ AssignInvalidUTF8Reason(invalid_reason, invalid_pos, i, UnicodeInvalidReason::BYTE_MISMATCH);
260570
+ return UnicodeType::INVALID;
260571
+ }
260572
+ if (type == UnicodeType::INVALID) {
260573
+ return type;
260574
+ }
260575
+ }
260576
+ }
260532
260577
  return type;
260533
260578
  }
260534
260579
 
260535
-
260536
260580
  char* Utf8Proc::Normalize(const char *s, size_t len) {
260537
260581
  assert(s);
260538
260582
  assert(Utf8Proc::Analyze(s, len) != UnicodeType::INVALID);
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "1f2e0822a"
15
- #define DUCKDB_VERSION "v0.5.1-dev225"
14
+ #define DUCKDB_SOURCE_ID "5e9609587"
15
+ #define DUCKDB_VERSION "v0.5.1-dev240"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //