duckdb 0.5.2-dev815.0 → 0.5.2-dev833.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.2-dev815.0",
4
+ "version": "0.5.2-dev833.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -29106,7 +29106,7 @@ struct RowOperations {
29106
29106
  namespace duckdb {
29107
29107
 
29108
29108
  template <class OP, class RETURN_TYPE, typename... ARGS>
29109
- RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
29109
+ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&...args) {
29110
29110
  D_ASSERT(radix_bits <= sizeof(hash_t) * 8);
29111
29111
  switch (radix_bits) {
29112
29112
  case 1:
@@ -29135,7 +29135,7 @@ RETURN_TYPE RadixBitsSwitch(idx_t radix_bits, ARGS &&... args) {
29135
29135
  }
29136
29136
 
29137
29137
  template <class OP, class RETURN_TYPE, idx_t radix_bits_1, typename... ARGS>
29138
- RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&... args) {
29138
+ RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&...args) {
29139
29139
  D_ASSERT(radix_bits_2 <= sizeof(hash_t) * 8);
29140
29140
  switch (radix_bits_2) {
29141
29141
  case 1:
@@ -29164,7 +29164,7 @@ RETURN_TYPE DoubleRadixBitsSwitch2(idx_t radix_bits_2, ARGS &&... args) {
29164
29164
  }
29165
29165
 
29166
29166
  template <class OP, class RETURN_TYPE, typename... ARGS>
29167
- RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&... args) {
29167
+ RETURN_TYPE DoubleRadixBitsSwitch1(idx_t radix_bits_1, idx_t radix_bits_2, ARGS &&...args) {
29168
29168
  D_ASSERT(radix_bits_1 <= sizeof(hash_t) * 8);
29169
29169
  switch (radix_bits_1) {
29170
29170
  case 1:
@@ -49455,6 +49455,9 @@ void Vector::Initialize(bool zero_data, idx_t capacity) {
49455
49455
  memset(data, 0, capacity * type_size);
49456
49456
  }
49457
49457
  }
49458
+ if (capacity > STANDARD_VECTOR_SIZE) {
49459
+ validity.Resize(STANDARD_VECTOR_SIZE, capacity);
49460
+ }
49458
49461
  }
49459
49462
 
49460
49463
  struct DataArrays {
@@ -93661,6 +93664,13 @@ unique_ptr<FunctionData> HistogramBindFunction(ClientContext &context, Aggregate
93661
93664
  vector<unique_ptr<Expression>> &arguments) {
93662
93665
 
93663
93666
  D_ASSERT(arguments.size() == 1);
93667
+
93668
+ if (arguments[0]->return_type.id() == LogicalTypeId::LIST ||
93669
+ arguments[0]->return_type.id() == LogicalTypeId::STRUCT ||
93670
+ arguments[0]->return_type.id() == LogicalTypeId::MAP) {
93671
+ throw NotImplementedException("Unimplemented type for histogram %s", arguments[0]->return_type.ToString());
93672
+ }
93673
+
93664
93674
  child_list_t<LogicalType> struct_children;
93665
93675
  struct_children.push_back({"key", LogicalType::LIST(arguments[0]->return_type)});
93666
93676
  struct_children.push_back({"value", LogicalType::LIST(LogicalType::UBIGINT)});
@@ -95714,6 +95724,11 @@ struct VectorCastHelpers {
95714
95724
  }
95715
95725
  };
95716
95726
 
95727
+ struct VectorStringifiedListParser {
95728
+ static idx_t CountParts(const string_t &input);
95729
+ static bool SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start, Vector &child);
95730
+ };
95731
+
95717
95732
  } // namespace duckdb
95718
95733
 
95719
95734
 
@@ -96484,20 +96499,8 @@ BoundCastInfo DefaultCasts::EnumCastSwitch(BindCastInput &input, const LogicalTy
96484
96499
 
96485
96500
  namespace duckdb {
96486
96501
 
96487
- struct ListBoundCastData : public BoundCastData {
96488
- explicit ListBoundCastData(BoundCastInfo child_cast) : child_cast_info(move(child_cast)) {
96489
- }
96490
-
96491
- BoundCastInfo child_cast_info;
96492
-
96493
- public:
96494
- unique_ptr<BoundCastData> Copy() const override {
96495
- return make_unique<ListBoundCastData>(child_cast_info.Copy());
96496
- }
96497
- };
96498
-
96499
- unique_ptr<BoundCastData> BindListToListCast(BindCastInput &input, const LogicalType &source,
96500
- const LogicalType &target) {
96502
+ unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &input, const LogicalType &source,
96503
+ const LogicalType &target) {
96501
96504
  vector<BoundCastInfo> child_cast_info;
96502
96505
  auto &source_child_type = ListType::GetChildType(source);
96503
96506
  auto &result_child_type = ListType::GetChildType(target);
@@ -96608,11 +96611,11 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
96608
96611
  BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
96609
96612
  switch (target.id()) {
96610
96613
  case LogicalTypeId::LIST:
96611
- return BoundCastInfo(ListToListCast, BindListToListCast(input, source, target));
96614
+ return BoundCastInfo(ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
96612
96615
  case LogicalTypeId::VARCHAR:
96613
96616
  case LogicalTypeId::JSON:
96614
- return BoundCastInfo(ListToVarcharCast,
96615
- BindListToListCast(input, source, LogicalType::LIST(LogicalType::VARCHAR)));
96617
+ return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
96618
+ input, source, LogicalType::LIST(LogicalType::VARCHAR)));
96616
96619
  default:
96617
96620
  return DefaultCasts::TryVectorNullCast;
96618
96621
  }
@@ -96950,9 +96953,97 @@ static BoundCastInfo VectorStringCastNumericSwitch(BindCastInput &input, const L
96950
96953
  }
96951
96954
  }
96952
96955
 
96956
+ bool StringListCastLoop(string_t *source_data, ValidityMask &source_mask, Vector &result, ValidityMask &result_mask,
96957
+ idx_t count, CastParameters &parameters, const SelectionVector *sel) {
96958
+
96959
+ idx_t total_list_size = 0;
96960
+ for (idx_t i = 0; i < count; i++) {
96961
+ idx_t idx = i;
96962
+ if (sel) {
96963
+ idx = sel->get_index(i);
96964
+ }
96965
+ if (!source_mask.RowIsValid(idx)) {
96966
+ continue;
96967
+ }
96968
+ total_list_size += VectorStringifiedListParser::CountParts(source_data[idx]);
96969
+ }
96970
+
96971
+ Vector varchar_vector(LogicalType::VARCHAR, total_list_size);
96972
+
96973
+ ListVector::Reserve(result, total_list_size);
96974
+ ListVector::SetListSize(result, total_list_size);
96975
+
96976
+ auto list_data = ListVector::GetData(result);
96977
+ auto child_data = FlatVector::GetData<string_t>(varchar_vector);
96978
+
96979
+ bool all_converted = true;
96980
+ idx_t total = 0;
96981
+ for (idx_t i = 0; i < count; i++) {
96982
+ idx_t idx = i;
96983
+ if (sel) {
96984
+ idx = sel->get_index(i);
96985
+ }
96986
+ if (!source_mask.RowIsValid(idx)) {
96987
+ result_mask.SetInvalid(i);
96988
+ continue;
96989
+ }
96990
+
96991
+ list_data[i].offset = total;
96992
+ auto valid =
96993
+ VectorStringifiedListParser::SplitStringifiedList(source_data[idx], child_data, total, varchar_vector);
96994
+ if (!valid) {
96995
+ string text = "Type VARCHAR with value '" + source_data[idx].GetString() +
96996
+ "' can't be cast to the destination type LIST";
96997
+ HandleVectorCastError::Operation<string_t>(text, result_mask, idx, parameters.error_message, all_converted);
96998
+ }
96999
+ list_data[i].length = total - list_data[i].offset; // length is the amount of parts coming from this string
97000
+ }
97001
+ D_ASSERT(total_list_size == total);
97002
+
97003
+ auto &result_child = ListVector::GetEntry(result);
97004
+ auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
97005
+ CastParameters child_parameters(parameters, cast_data.child_cast_info.cast_data.get());
97006
+ return cast_data.child_cast_info.function(varchar_vector, result_child, total_list_size, child_parameters) &&
97007
+ all_converted;
97008
+ }
97009
+
97010
+ bool StringListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
97011
+ D_ASSERT(source.GetType().id() == LogicalTypeId::VARCHAR);
97012
+ D_ASSERT(result.GetType().id() == LogicalTypeId::LIST);
97013
+
97014
+ switch (source.GetVectorType()) {
97015
+ case VectorType::CONSTANT_VECTOR: {
97016
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
97017
+
97018
+ auto source_data = ConstantVector::GetData<string_t>(source);
97019
+ auto &source_mask = ConstantVector::Validity(source);
97020
+ auto &result_mask = ConstantVector::Validity(result);
97021
+
97022
+ return StringListCastLoop(source_data, source_mask, result, result_mask, 1, parameters, nullptr);
97023
+ }
97024
+ default: {
97025
+ UnifiedVectorFormat unified_source;
97026
+ result.SetVectorType(VectorType::FLAT_VECTOR);
97027
+
97028
+ source.ToUnifiedFormat(count, unified_source);
97029
+ auto source_sel = unified_source.sel;
97030
+ auto source_data = (string_t *)unified_source.data;
97031
+ auto &source_mask = unified_source.validity;
97032
+ auto &result_mask = FlatVector::Validity(result);
97033
+
97034
+ return StringListCastLoop(source_data, source_mask, result, result_mask, count, parameters, source_sel);
97035
+ }
97036
+ }
97037
+ }
97038
+
97039
+ BoundCastInfo StringToListCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
97040
+ // second argument allows for a secondary casting function to be passed in the CastParameters
97041
+ return BoundCastInfo(&StringListCast,
97042
+ ListBoundCastData::BindListToListCast(input, LogicalType::LIST(LogicalType::VARCHAR), target));
97043
+ }
97044
+
96953
97045
  BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const LogicalType &source,
96954
97046
  const LogicalType &target) {
96955
- // now switch on the target type
96956
97047
  switch (target.id()) {
96957
97048
  case LogicalTypeId::DATE:
96958
97049
  return BoundCastInfo(&VectorCastHelpers::TryCastErrorLoop<string_t, date_t, duckdb::TryCastErrorMessage>);
@@ -96980,6 +97071,8 @@ BoundCastInfo DefaultCasts::StringCastSwitch(BindCastInput &input, const Logical
96980
97071
  case LogicalTypeId::VARCHAR:
96981
97072
  case LogicalTypeId::JSON:
96982
97073
  return &DefaultCasts::ReinterpretCast;
97074
+ case LogicalTypeId::LIST:
97075
+ return StringToListCast(input, source, target);
96983
97076
  default:
96984
97077
  return VectorStringCastNumericSwitch(input, source, target);
96985
97078
  }
@@ -97341,6 +97434,144 @@ BoundCastInfo DefaultCasts::UUIDCastSwitch(BindCastInput &input, const LogicalTy
97341
97434
  } // namespace duckdb
97342
97435
 
97343
97436
 
97437
+ namespace duckdb {
97438
+
97439
+ struct CountPartOperation {
97440
+ idx_t count = 0;
97441
+
97442
+ void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
97443
+ count++;
97444
+ }
97445
+ };
97446
+
97447
+ struct SplitStringOperation {
97448
+ SplitStringOperation(string_t *child_data, idx_t &child_start, Vector &child)
97449
+ : child_data(child_data), child_start(child_start), child(child) {
97450
+ }
97451
+
97452
+ string_t *child_data;
97453
+ idx_t &child_start;
97454
+ Vector &child;
97455
+
97456
+ void HandleValue(const char *buf, idx_t start_pos, idx_t pos) {
97457
+
97458
+ if ((pos - start_pos) >= 4 && buf[start_pos] == 'N' && buf[start_pos + 1] == 'U' && buf[start_pos + 2] == 'L' &&
97459
+ buf[start_pos + 3] == 'L') {
97460
+ FlatVector::SetNull(child, child_start, true);
97461
+ child_start++;
97462
+ return;
97463
+ }
97464
+ child_data[child_start] = StringVector::AddString(child, buf + start_pos, pos - start_pos);
97465
+ child_start++;
97466
+ }
97467
+ };
97468
+
97469
+ static bool SkipToCloseQuotes(idx_t &pos, const char *buf, idx_t &len) {
97470
+ char quote = buf[pos];
97471
+ pos++;
97472
+
97473
+ while (pos < len) {
97474
+ if (buf[pos] == quote) {
97475
+ return true;
97476
+ }
97477
+ pos++;
97478
+ }
97479
+ return false;
97480
+ }
97481
+
97482
+ static bool SkipToClose(idx_t &idx, const char *buf, idx_t &len, idx_t &lvl) {
97483
+ while (idx < len) {
97484
+ if (buf[idx] == '[') {
97485
+ if (!SkipToClose(++idx, buf, len, lvl)) {
97486
+ return false;
97487
+ }
97488
+ lvl++;
97489
+ idx++;
97490
+ }
97491
+ if (buf[idx] == '"' || buf[idx] == '\'') {
97492
+ SkipToCloseQuotes(idx, buf, len);
97493
+ }
97494
+ if (buf[idx] == ']') {
97495
+ lvl--;
97496
+ return true;
97497
+ }
97498
+ idx++;
97499
+ }
97500
+ return false;
97501
+ }
97502
+
97503
+ template <class OP>
97504
+ static bool SplitStringifiedListInternal(const string_t &input, OP &state) {
97505
+ const char *buf = input.GetDataUnsafe();
97506
+ idx_t len = input.GetSize();
97507
+ idx_t lvl = 1;
97508
+ idx_t pos = 0;
97509
+
97510
+ while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
97511
+ pos++;
97512
+ }
97513
+ if (pos == len || buf[pos] != '[') {
97514
+ return false;
97515
+ }
97516
+ pos++;
97517
+ while (pos < len && StringUtil::CharacterIsSpace(buf[pos])) {
97518
+ pos++;
97519
+ }
97520
+
97521
+ idx_t start_pos = pos;
97522
+ while (pos < len) {
97523
+ if (buf[pos] == '[') {
97524
+ if (!SkipToClose(++pos, buf, len, ++lvl)) {
97525
+ return false;
97526
+ }
97527
+ } else if (buf[pos] == '"' || buf[pos] == '\'') {
97528
+ SkipToCloseQuotes(pos, buf, len);
97529
+ } else if (buf[pos] == ',' || buf[pos] == ']') {
97530
+ idx_t trailing_whitespace = 0;
97531
+ while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) {
97532
+ trailing_whitespace++;
97533
+ }
97534
+ if (!(buf[pos] == ']' && start_pos == (pos))) {
97535
+ state.HandleValue(buf, start_pos, pos - trailing_whitespace);
97536
+ } // else the list is empty
97537
+ if (buf[pos] == ']') {
97538
+ lvl--;
97539
+ break;
97540
+ }
97541
+ while (pos + 1 < len && StringUtil::CharacterIsSpace(buf[pos + 1])) {
97542
+ pos++;
97543
+ }
97544
+ start_pos = pos + 1;
97545
+ }
97546
+ pos++;
97547
+ }
97548
+ pos++;
97549
+ while (pos < len) {
97550
+ if (!StringUtil::CharacterIsSpace(buf[pos])) {
97551
+ return false;
97552
+ }
97553
+ pos++;
97554
+ }
97555
+ if (lvl != 0) {
97556
+ return false;
97557
+ }
97558
+ return true;
97559
+ }
97560
+
97561
+ bool VectorStringifiedListParser::SplitStringifiedList(const string_t &input, string_t *child_data, idx_t &child_start,
97562
+ Vector &child) {
97563
+ SplitStringOperation state(child_data, child_start, child);
97564
+ return SplitStringifiedListInternal<SplitStringOperation>(input, state);
97565
+ }
97566
+
97567
+ idx_t VectorStringifiedListParser::CountParts(const string_t &input) {
97568
+ CountPartOperation state;
97569
+ SplitStringifiedListInternal<CountPartOperation>(input, state);
97570
+ return state.count;
97571
+ }
97572
+ } // namespace duckdb
97573
+
97574
+
97344
97575
  namespace duckdb {
97345
97576
 
97346
97577
  //! The target type determines the preferred implicit casts