duckdb 0.5.0 → 0.5.1-dev101.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/duckdb.cpp CHANGED
@@ -620,7 +620,88 @@ public:
620
620
 
621
621
  } // namespace duckdb
622
622
 
623
+ //===----------------------------------------------------------------------===//
624
+ // DuckDB
625
+ //
626
+ // extension_functions.hpp
627
+ //
628
+ //
629
+ //===----------------------------------------------------------------------===//
630
+
631
+
632
+
623
633
 
634
+
635
+ namespace duckdb {
636
+
637
+ struct ExtensionFunction {
638
+ char function[48];
639
+ char extension[48];
640
+ };
641
+
642
+ static constexpr ExtensionFunction EXTENSION_FUNCTIONS[] = {
643
+ {"->>", "json"},
644
+ {"array_to_json", "json"},
645
+ {"create_fts_index", "fts"},
646
+ {"dbgen", "tpch"},
647
+ {"drop_fts_index", "fts"},
648
+ {"dsdgen", "tpcds"},
649
+ {"excel_text", "excel"},
650
+ {"from_json", "json"},
651
+ {"from_json_strict", "json"},
652
+ {"from_substrait", "substrait"},
653
+ {"get_substrait", "substrait"},
654
+ {"get_substrait_json", "substrait"},
655
+ {"icu_calendar_names", "icu"},
656
+ {"icu_sort_key", "icu"},
657
+ {"json", "json"},
658
+ {"json_array", "json"},
659
+ {"json_array_length", "json"},
660
+ {"json_extract", "json"},
661
+ {"json_extract_path", "json"},
662
+ {"json_extract_path_text", "json"},
663
+ {"json_extract_string", "json"},
664
+ {"json_group_array", "json"},
665
+ {"json_group_object", "json"},
666
+ {"json_group_structure", "json"},
667
+ {"json_merge_patch", "json"},
668
+ {"json_object", "json"},
669
+ {"json_quote", "json"},
670
+ {"json_structure", "json"},
671
+ {"json_transform", "json"},
672
+ {"json_transform_strict", "json"},
673
+ {"json_type", "json"},
674
+ {"json_valid", "json"},
675
+ {"make_timestamptz", "icu"},
676
+ {"parquet_metadata", "parquet"},
677
+ {"parquet_scan", "parquet"},
678
+ {"parquet_schema", "parquet"},
679
+ {"pg_timezone_names", "icu"},
680
+ {"postgres_attach", "postgres_scanner"},
681
+ {"postgres_scan", "postgres_scanner"},
682
+ {"postgres_scan_pushdown", "postgres_scanner"},
683
+ {"read_json_objects", "json"},
684
+ {"read_ndjson_objects", "json"},
685
+ {"read_parquet", "parquet"},
686
+ {"row_to_json", "json"},
687
+ {"sqlite_attach", "sqlite_scanner"},
688
+ {"sqlite_scan", "sqlite_scanner"},
689
+ {"stem", "fts"},
690
+ {"text", "excel"},
691
+ {"to_json", "json"},
692
+ {"tpcds", "tpcds"},
693
+ {"tpcds_answers", "tpcds"},
694
+ {"tpcds_queries", "tpcds"},
695
+ {"tpch", "tpch"},
696
+ {"tpch_answers", "tpch"},
697
+ {"tpch_queries", "tpch"},
698
+ {"visualize_diff_profiling_output", "visualizer"},
699
+ {"visualize_json_profiling_output", "visualizer"},
700
+ {"visualize_last_profiling_output", "visualizer"},
701
+ };
702
+ } // namespace duckdb
703
+
704
+ #include <algorithm>
624
705
  namespace duckdb {
625
706
 
626
707
  string SimilarCatalogEntry::GetQualifiedName() const {
@@ -823,6 +904,16 @@ SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const
823
904
  return {most_similar.first, most_similar.second, schema_of_most_similar};
824
905
  }
825
906
 
907
+ string FindExtension(const string &function_name) {
908
+ auto size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionFunction);
909
+ auto it = std::lower_bound(
910
+ EXTENSION_FUNCTIONS, EXTENSION_FUNCTIONS + size, function_name,
911
+ [](const ExtensionFunction &element, const string &value) { return element.function < value; });
912
+ if (it != EXTENSION_FUNCTIONS + size && it->function == function_name) {
913
+ return it->extension;
914
+ }
915
+ return "";
916
+ }
826
917
  CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name,
827
918
  CatalogType type, const vector<SchemaCatalogEntry *> &schemas,
828
919
  QueryErrorContext error_context) {
@@ -836,7 +927,12 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
836
927
  }
837
928
  });
838
929
  auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
839
-
930
+ auto extension_name = FindExtension(entry_name);
931
+ if (!extension_name.empty()) {
932
+ return CatalogException("Function with name %s is not on the catalog, but it exists in the %s extension. To "
933
+ "Install and Load the extension, run: INSTALL %s; LOAD %s;",
934
+ entry_name, extension_name, extension_name, extension_name);
935
+ }
840
936
  string did_you_mean;
841
937
  if (unseen_entry.Found() && unseen_entry.distance < entry.distance) {
842
938
  did_you_mean = "\nDid you mean \"" + unseen_entry.GetQualifiedName() + "\"?";
@@ -33336,6 +33432,9 @@ void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr
33336
33432
 
33337
33433
  static inline void VerifyUnswizzledString(const RowLayout &layout, const idx_t &col_idx, const data_ptr_t &row_ptr) {
33338
33434
  #ifdef DEBUG
33435
+ if (layout.GetTypes()[col_idx] == LogicalTypeId::BLOB) {
33436
+ return;
33437
+ }
33339
33438
  idx_t entry_idx;
33340
33439
  idx_t idx_in_entry;
33341
33440
  ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
@@ -35673,7 +35772,10 @@ struct SortConstants {
35673
35772
 
35674
35773
  struct SortLayout {
35675
35774
  public:
35775
+ SortLayout() {
35776
+ }
35676
35777
  explicit SortLayout(const vector<BoundOrderByNode> &orders);
35778
+ SortLayout GetPrefixComparisonLayout(idx_t num_prefix_cols) const;
35677
35779
 
35678
35780
  public:
35679
35781
  idx_t column_count;
@@ -37324,6 +37426,32 @@ SortLayout::SortLayout(const vector<BoundOrderByNode> &orders)
37324
37426
  blob_layout.Initialize(blob_layout_types);
37325
37427
  }
37326
37428
 
37429
+ SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const {
37430
+ SortLayout result;
37431
+ result.column_count = num_prefix_cols;
37432
+ result.all_constant = true;
37433
+ result.comparison_size = 0;
37434
+ for (idx_t col_idx = 0; col_idx < num_prefix_cols; col_idx++) {
37435
+ result.order_types.push_back(order_types[col_idx]);
37436
+ result.order_by_null_types.push_back(order_by_null_types[col_idx]);
37437
+ result.logical_types.push_back(logical_types[col_idx]);
37438
+
37439
+ result.all_constant = result.all_constant && constant_size[col_idx];
37440
+ result.constant_size.push_back(constant_size[col_idx]);
37441
+
37442
+ result.comparison_size += column_sizes[col_idx];
37443
+ result.column_sizes.push_back(column_sizes[col_idx]);
37444
+
37445
+ result.prefix_lengths.push_back(prefix_lengths[col_idx]);
37446
+ result.stats.push_back(stats[col_idx]);
37447
+ result.has_null.push_back(has_null[col_idx]);
37448
+ }
37449
+ result.entry_size = entry_size;
37450
+ result.blob_layout = blob_layout;
37451
+ result.sorting_to_blob_col = sorting_to_blob_col;
37452
+ return result;
37453
+ }
37454
+
37327
37455
  LocalSortState::LocalSortState() : initialized(false) {
37328
37456
  }
37329
37457
 
@@ -47573,11 +47701,36 @@ Value Value::CreateValue(dtime_t value) {
47573
47701
  return Value::TIME(value);
47574
47702
  }
47575
47703
 
47704
+ template <>
47705
+ Value Value::CreateValue(dtime_tz_t value) {
47706
+ return Value::TIMETZ(value);
47707
+ }
47708
+
47576
47709
  template <>
47577
47710
  Value Value::CreateValue(timestamp_t value) {
47578
47711
  return Value::TIMESTAMP(value);
47579
47712
  }
47580
47713
 
47714
+ template <>
47715
+ Value Value::CreateValue(timestamp_sec_t value) {
47716
+ return Value::TIMESTAMPSEC(value);
47717
+ }
47718
+
47719
+ template <>
47720
+ Value Value::CreateValue(timestamp_ms_t value) {
47721
+ return Value::TIMESTAMPMS(value);
47722
+ }
47723
+
47724
+ template <>
47725
+ Value Value::CreateValue(timestamp_ns_t value) {
47726
+ return Value::TIMESTAMPNS(value);
47727
+ }
47728
+
47729
+ template <>
47730
+ Value Value::CreateValue(timestamp_tz_t value) {
47731
+ return Value::TIMESTAMPTZ(value);
47732
+ }
47733
+
47581
47734
  template <>
47582
47735
  Value Value::CreateValue(const char *value) {
47583
47736
  return Value(string(value));
@@ -49150,19 +49303,6 @@ void Vector::Resize(idx_t cur_size, idx_t new_size) {
49150
49303
  }
49151
49304
  }
49152
49305
 
49153
- // FIXME Just like DECIMAL, it's important that type_info gets considered when determining whether or not to cast
49154
- // just comparing internal type is not always enough
49155
- static bool ValueShouldBeCast(const LogicalType &incoming, const LogicalType &target) {
49156
- if (incoming.InternalType() != target.InternalType()) {
49157
- return true;
49158
- }
49159
- if (incoming.id() == LogicalTypeId::DECIMAL && incoming.id() == target.id()) {
49160
- //! Compare the type_info
49161
- return incoming != target;
49162
- }
49163
- return false;
49164
- }
49165
-
49166
49306
  void Vector::SetValue(idx_t index, const Value &val) {
49167
49307
  if (GetVectorType() == VectorType::DICTIONARY_VECTOR) {
49168
49308
  // dictionary: apply dictionary and forward to child
@@ -49170,10 +49310,11 @@ void Vector::SetValue(idx_t index, const Value &val) {
49170
49310
  auto &child = DictionaryVector::Child(*this);
49171
49311
  return child.SetValue(sel_vector.get_index(index), val);
49172
49312
  }
49173
- if (ValueShouldBeCast(val.type(), GetType())) {
49313
+ if (val.type() != GetType()) {
49174
49314
  SetValue(index, val.CastAs(GetType()));
49175
49315
  return;
49176
49316
  }
49317
+ D_ASSERT(val.type().InternalType() == GetType().InternalType());
49177
49318
 
49178
49319
  validity.EnsureWritable();
49179
49320
  validity.Set(index, !val.IsNull());
@@ -49424,7 +49565,10 @@ Value Vector::GetValue(const Vector &v_p, idx_t index_p) {
49424
49565
  auto value = GetValueInternal(v_p, index_p);
49425
49566
  // set the alias of the type to the correct value, if there is a type alias
49426
49567
  if (v_p.GetType().HasAlias()) {
49427
- value.type().SetAlias(v_p.GetType().GetAlias());
49568
+ value.type().CopyAuxInfo(v_p.GetType());
49569
+ }
49570
+ if (v_p.GetType().id() != LogicalTypeId::AGGREGATE_STATE && value.type().id() != LogicalTypeId::AGGREGATE_STATE) {
49571
+ D_ASSERT(v_p.GetType() == value.type());
49428
49572
  }
49429
49573
  return value;
49430
49574
  }
@@ -51491,6 +51635,7 @@ public:
51491
51635
  if (!alias.empty()) {
51492
51636
  return false;
51493
51637
  }
51638
+ //! We only need to compare aliases when both types have them in this case
51494
51639
  return true;
51495
51640
  }
51496
51641
  if (alias != other_p->alias) {
@@ -51504,8 +51649,7 @@ public:
51504
51649
  if (type != other_p->type) {
51505
51650
  return false;
51506
51651
  }
51507
- auto &other = (ExtraTypeInfo &)*other_p;
51508
- return alias == other.alias && EqualsInternal(other_p);
51652
+ return alias == other_p->alias && EqualsInternal(other_p);
51509
51653
  }
51510
51654
  //! Serializes a ExtraTypeInfo to a stand-alone binary blob
51511
51655
  virtual void Serialize(FieldWriter &writer) const {};
@@ -52184,10 +52328,7 @@ LogicalType LogicalType::Deserialize(Deserializer &source) {
52184
52328
  return LogicalType(id, move(info));
52185
52329
  }
52186
52330
 
52187
- bool LogicalType::operator==(const LogicalType &rhs) const {
52188
- if (id_ != rhs.id_) {
52189
- return false;
52190
- }
52331
+ bool LogicalType::EqualTypeInfo(const LogicalType &rhs) const {
52191
52332
  if (type_info_.get() == rhs.type_info_.get()) {
52192
52333
  return true;
52193
52334
  }
@@ -52199,6 +52340,13 @@ bool LogicalType::operator==(const LogicalType &rhs) const {
52199
52340
  }
52200
52341
  }
52201
52342
 
52343
+ bool LogicalType::operator==(const LogicalType &rhs) const {
52344
+ if (id_ != rhs.id_) {
52345
+ return false;
52346
+ }
52347
+ return EqualTypeInfo(rhs);
52348
+ }
52349
+
52202
52350
  } // namespace duckdb
52203
52351
 
52204
52352
 
@@ -64814,6 +64962,7 @@ public:
64814
64962
 
64815
64963
 
64816
64964
 
64965
+
64817
64966
  //===----------------------------------------------------------------------===//
64818
64967
  // DuckDB
64819
64968
  //
@@ -64909,7 +65058,6 @@ private:
64909
65058
 
64910
65059
 
64911
65060
 
64912
-
64913
65061
  #include <algorithm>
64914
65062
  #include <cmath>
64915
65063
  #include <numeric>
@@ -64927,12 +65075,14 @@ public:
64927
65075
 
64928
65076
  WindowGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
64929
65077
  const Types &payload_types, idx_t max_mem, bool external)
64930
- : memory_per_thread(max_mem), count(0), partition_layout(partitions) {
65078
+ : memory_per_thread(max_mem), count(0) {
64931
65079
 
64932
65080
  RowLayout payload_layout;
64933
65081
  payload_layout.Initialize(payload_types);
64934
65082
  global_sort = make_unique<GlobalSortState>(buffer_manager, orders, payload_layout);
64935
65083
  global_sort->external = external;
65084
+
65085
+ partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
64936
65086
  }
64937
65087
 
64938
65088
  void Combine(LocalSortState &local_sort) {
@@ -65605,7 +65755,10 @@ struct WindowInputExpression {
65605
65755
 
65606
65756
  inline bool CellIsNull(idx_t i) const {
65607
65757
  D_ASSERT(!chunk.data.empty());
65608
- return FlatVector::IsNull(chunk.data[0], scalar ? 0 : i);
65758
+ if (chunk.data[0].GetVectorType() == VectorType::CONSTANT_VECTOR) {
65759
+ return ConstantVector::IsNull(chunk.data[0]);
65760
+ }
65761
+ return FlatVector::IsNull(chunk.data[0], i);
65609
65762
  }
65610
65763
 
65611
65764
  inline void CopyCell(Vector &target, idx_t target_offset) const {
@@ -93278,21 +93431,21 @@ AggregateFunction GetHistogramFunction(const LogicalType &type) {
93278
93431
  case LogicalType::VARCHAR:
93279
93432
  return GetMapType<HistogramStringFunctor, string, IS_ORDERED>(type);
93280
93433
  case LogicalType::TIMESTAMP:
93281
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93434
+ return GetMapType<HistogramFunctor, timestamp_t, IS_ORDERED>(type);
93282
93435
  case LogicalType::TIMESTAMP_TZ:
93283
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93436
+ return GetMapType<HistogramFunctor, timestamp_tz_t, IS_ORDERED>(type);
93284
93437
  case LogicalType::TIMESTAMP_S:
93285
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93438
+ return GetMapType<HistogramFunctor, timestamp_sec_t, IS_ORDERED>(type);
93286
93439
  case LogicalType::TIMESTAMP_MS:
93287
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93440
+ return GetMapType<HistogramFunctor, timestamp_ms_t, IS_ORDERED>(type);
93288
93441
  case LogicalType::TIMESTAMP_NS:
93289
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93442
+ return GetMapType<HistogramFunctor, timestamp_ns_t, IS_ORDERED>(type);
93290
93443
  case LogicalType::TIME:
93291
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93444
+ return GetMapType<HistogramFunctor, dtime_t, IS_ORDERED>(type);
93292
93445
  case LogicalType::TIME_TZ:
93293
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93446
+ return GetMapType<HistogramFunctor, dtime_tz_t, IS_ORDERED>(type);
93294
93447
  case LogicalType::DATE:
93295
- return GetMapType<HistogramFunctor, int32_t, IS_ORDERED>(type);
93448
+ return GetMapType<HistogramFunctor, date_t, IS_ORDERED>(type);
93296
93449
  default:
93297
93450
  throw InternalException("Unimplemented histogram aggregate");
93298
93451
  }
@@ -96856,7 +97009,8 @@ struct DateDiff {
96856
97009
  struct WeekOperator {
96857
97010
  template <class TA, class TB, class TR>
96858
97011
  static inline TR Operation(TA startdate, TB enddate) {
96859
- return Date::Epoch(enddate) / Interval::SECS_PER_WEEK - Date::Epoch(startdate) / Interval::SECS_PER_WEEK;
97012
+ return Date::Epoch(Date::GetMondayOfCurrentWeek(enddate)) / Interval::SECS_PER_WEEK -
97013
+ Date::Epoch(Date::GetMondayOfCurrentWeek(startdate)) / Interval::SECS_PER_WEEK;
96860
97014
  }
96861
97015
  };
96862
97016
 
@@ -103240,12 +103394,49 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
103240
103394
  result, state_vector.state_vector, count);
103241
103395
  break;
103242
103396
  case PhysicalType::INT32:
103243
- FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
103244
- result, state_vector.state_vector, count);
103397
+ if (key_type.id() == LogicalTypeId::DATE) {
103398
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, date_t>(
103399
+ result, state_vector.state_vector, count);
103400
+ } else {
103401
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
103402
+ result, state_vector.state_vector, count);
103403
+ }
103245
103404
  break;
103246
103405
  case PhysicalType::INT64:
103247
- FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
103248
- result, state_vector.state_vector, count);
103406
+ switch (key_type.id()) {
103407
+ case LogicalTypeId::TIME:
103408
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_t>(
103409
+ result, state_vector.state_vector, count);
103410
+ break;
103411
+ case LogicalTypeId::TIME_TZ:
103412
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_tz_t>(
103413
+ result, state_vector.state_vector, count);
103414
+ break;
103415
+ case LogicalTypeId::TIMESTAMP:
103416
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_t>(
103417
+ result, state_vector.state_vector, count);
103418
+ break;
103419
+ case LogicalTypeId::TIMESTAMP_MS:
103420
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ms_t>(
103421
+ result, state_vector.state_vector, count);
103422
+ break;
103423
+ case LogicalTypeId::TIMESTAMP_NS:
103424
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ns_t>(
103425
+ result, state_vector.state_vector, count);
103426
+ break;
103427
+ case LogicalTypeId::TIMESTAMP_SEC:
103428
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_sec_t>(
103429
+ result, state_vector.state_vector, count);
103430
+ break;
103431
+ case LogicalTypeId::TIMESTAMP_TZ:
103432
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_tz_t>(
103433
+ result, state_vector.state_vector, count);
103434
+ break;
103435
+ default:
103436
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
103437
+ result, state_vector.state_vector, count);
103438
+ break;
103439
+ }
103249
103440
  break;
103250
103441
  case PhysicalType::FLOAT:
103251
103442
  FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, float>(
@@ -104315,18 +104506,12 @@ void SinkDataChunk(Vector *child_vector, SelectionVector &sel, idx_t offset_list
104315
104506
  static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &result) {
104316
104507
  D_ASSERT(args.ColumnCount() >= 1 && args.ColumnCount() <= 3);
104317
104508
  auto count = args.size();
104318
- Vector &lists = args.data[0];
104509
+ Vector &input_lists = args.data[0];
104319
104510
 
104320
104511
  result.SetVectorType(VectorType::FLAT_VECTOR);
104321
104512
  auto &result_validity = FlatVector::Validity(result);
104322
104513
 
104323
- for (auto &v : args.data) {
104324
- if (v.GetVectorType() != VectorType::FLAT_VECTOR && v.GetVectorType() != VectorType::CONSTANT_VECTOR) {
104325
- v.Flatten(count);
104326
- }
104327
- }
104328
-
104329
- if (lists.GetType().id() == LogicalTypeId::SQLNULL) {
104514
+ if (input_lists.GetType().id() == LogicalTypeId::SQLNULL) {
104330
104515
  result_validity.SetInvalid(0);
104331
104516
  return;
104332
104517
  }
@@ -104341,15 +104526,18 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104341
104526
  LocalSortState local_sort_state;
104342
104527
  local_sort_state.Initialize(global_sort_state, buffer_manager);
104343
104528
 
104529
+ // this ensures that we do not change the order of the entries in the input chunk
104530
+ VectorOperations::Copy(input_lists, result, count, 0, 0);
104531
+
104344
104532
  // get the child vector
104345
- auto lists_size = ListVector::GetListSize(lists);
104346
- auto &child_vector = ListVector::GetEntry(lists);
104533
+ auto lists_size = ListVector::GetListSize(result);
104534
+ auto &child_vector = ListVector::GetEntry(result);
104347
104535
  UnifiedVectorFormat child_data;
104348
104536
  child_vector.ToUnifiedFormat(lists_size, child_data);
104349
104537
 
104350
104538
  // get the lists data
104351
104539
  UnifiedVectorFormat lists_data;
104352
- lists.ToUnifiedFormat(count, lists_data);
104540
+ result.ToUnifiedFormat(count, lists_data);
104353
104541
  auto list_entries = (list_entry_t *)lists_data.data;
104354
104542
 
104355
104543
  // create the lists_indices vector, this contains an element for each list's entry,
@@ -104446,8 +104634,6 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104446
104634
  child_vector.Flatten(sel_sorted_idx);
104447
104635
  }
104448
104636
 
104449
- result.Reference(lists);
104450
-
104451
104637
  if (args.AllConstant()) {
104452
104638
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
104453
104639
  }
@@ -141399,7 +141585,7 @@ Value ForceCompressionSetting::GetSetting(ClientContext &context) {
141399
141585
  //===--------------------------------------------------------------------===//
141400
141586
  void HomeDirectorySetting::SetLocal(ClientContext &context, const Value &input) {
141401
141587
  auto &config = ClientConfig::GetConfig(context);
141402
- config.home_directory = input.IsNull() ? input.ToString() : string();
141588
+ config.home_directory = input.IsNull() ? string() : input.ToString();
141403
141589
  }
141404
141590
 
141405
141591
  Value HomeDirectorySetting::GetSetting(ClientContext &context) {
@@ -151922,6 +152108,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalGet
151922
152108
 
151923
152109
 
151924
152110
 
152111
+
151925
152112
  namespace duckdb {
151926
152113
 
151927
152114
  void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, unique_ptr<LogicalOperator> *node_ptr) {
@@ -151951,10 +152138,15 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
151951
152138
  // semi or inner join on false; entire node can be pruned
151952
152139
  ReplaceWithEmptyResult(*node_ptr);
151953
152140
  return;
151954
- case JoinType::ANTI:
151955
- // anti join: replace entire join with LHS
151956
- *node_ptr = move(join.children[0]);
152141
+ case JoinType::ANTI: {
152142
+ // when the right child has data, return the left child
152143
+ // when the right child has no data, return an empty set
152144
+ auto limit = make_unique<LogicalLimit>(1, 0, nullptr, nullptr);
152145
+ limit->AddChild(move(join.children[1]));
152146
+ auto cross_product = LogicalCrossProduct::Create(move(join.children[0]), move(limit));
152147
+ *node_ptr = move(cross_product);
151957
152148
  return;
152149
+ }
151958
152150
  case JoinType::LEFT:
151959
152151
  // anti/left outer join: replace right side with empty node
151960
152152
  ReplaceWithEmptyResult(join.children[1]);
@@ -151982,10 +152174,15 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
151982
152174
  } else {
151983
152175
  // this is the only condition and it is always true: all conditions are true
151984
152176
  switch (join.join_type) {
151985
- case JoinType::SEMI:
151986
- // semi join on true: replace entire join with LHS
151987
- *node_ptr = move(join.children[0]);
152177
+ case JoinType::SEMI: {
152178
+ // when the right child has data, return the left child
152179
+ // when the right child has no data, return an empty set
152180
+ auto limit = make_unique<LogicalLimit>(1, 0, nullptr, nullptr);
152181
+ limit->AddChild(move(join.children[1]));
152182
+ auto cross_product = LogicalCrossProduct::Create(move(join.children[0]), move(limit));
152183
+ *node_ptr = move(cross_product);
151988
152184
  return;
152185
+ }
151989
152186
  case JoinType::INNER:
151990
152187
  case JoinType::LEFT:
151991
152188
  case JoinType::RIGHT:
@@ -152102,6 +152299,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalJoin
152102
152299
  // then propagate into the join conditions
152103
152300
  switch (join.type) {
152104
152301
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
152302
+ case LogicalOperatorType::LOGICAL_DELIM_JOIN:
152105
152303
  PropagateStatistics((LogicalComparisonJoin &)join, node_ptr);
152106
152304
  break;
152107
152305
  case LogicalOperatorType::LOGICAL_ANY_JOIN:
@@ -171345,7 +171543,7 @@ void Transformer::TransformCTE(duckdb_libpgquery::PGWithClause *de_with_clause,
171345
171543
  }
171346
171544
  // we need a query
171347
171545
  if (!cte->ctequery || cte->ctequery->type != duckdb_libpgquery::T_PGSelectStmt) {
171348
- throw InternalException("A CTE needs a SELECT");
171546
+ throw NotImplementedException("A CTE needs a SELECT");
171349
171547
  }
171350
171548
 
171351
171549
  // CTE transformation can either result in inlining for non recursive CTEs, or in recursive CTE bindings
@@ -171762,7 +171960,7 @@ LogicalType Transformer::TransformTypeName(duckdb_libpgquery::PGTypeName *type_n
171762
171960
 
171763
171961
  result_type = LogicalType::MAP(move(children));
171764
171962
  } else {
171765
- int8_t width, scale;
171963
+ int64_t width, scale;
171766
171964
  if (base_type == LogicalTypeId::DECIMAL) {
171767
171965
  // default decimal width/scale
171768
171966
  width = 18;
@@ -180860,6 +181058,9 @@ BoundStatement Binder::BindSummarize(ShowStatement &stmt) {
180860
181058
 
180861
181059
 
180862
181060
 
181061
+
181062
+
181063
+
180863
181064
  //===----------------------------------------------------------------------===//
180864
181065
  // DuckDB
180865
181066
  //
@@ -180894,10 +181095,6 @@ protected:
180894
181095
 
180895
181096
 
180896
181097
 
180897
-
180898
-
180899
-
180900
-
180901
181098
  //===----------------------------------------------------------------------===//
180902
181099
  // DuckDB
180903
181100
  //
@@ -180930,6 +181127,8 @@ public:
180930
181127
  };
180931
181128
  } // namespace duckdb
180932
181129
 
181130
+
181131
+
180933
181132
  #include <algorithm>
180934
181133
 
180935
181134
  namespace duckdb {
@@ -181100,10 +181299,10 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) {
181100
181299
  if (column.Generated()) {
181101
181300
  throw BinderException("Cant update column \"%s\" because it is a generated column!", column.Name());
181102
181301
  }
181103
- if (std::find(update->columns.begin(), update->columns.end(), column.Oid()) != update->columns.end()) {
181302
+ if (std::find(update->columns.begin(), update->columns.end(), column.StorageOid()) != update->columns.end()) {
181104
181303
  throw BinderException("Multiple assignments to same column \"%s\"", colname);
181105
181304
  }
181106
- update->columns.push_back(column.Oid());
181305
+ update->columns.push_back(column.StorageOid());
181107
181306
 
181108
181307
  if (expr->type == ExpressionType::VALUE_DEFAULT) {
181109
181308
  update->expressions.push_back(make_unique<BoundDefaultExpression>(column.Type()));
@@ -181185,7 +181384,20 @@ BoundStatement Binder::Bind(VacuumStatement &stmt) {
181185
181384
  auto &get = (LogicalGet &)*ref->get;
181186
181385
  columns.insert(columns.end(), get.names.begin(), get.names.end());
181187
181386
  }
181387
+
181388
+ case_insensitive_set_t column_name_set;
181389
+ vector<string> non_generated_column_names;
181188
181390
  for (auto &col_name : columns) {
181391
+ if (column_name_set.count(col_name) > 0) {
181392
+ throw BinderException("Vacuum the same column twice(same name in column name list)");
181393
+ }
181394
+ column_name_set.insert(col_name);
181395
+ auto &col = ref->table->GetColumn(col_name);
181396
+ // ignore generated column
181397
+ if (col.Generated()) {
181398
+ continue;
181399
+ }
181400
+ non_generated_column_names.push_back(col_name);
181189
181401
  ColumnRefExpression colref(col_name, ref->table->name);
181190
181402
  auto result = bind_context.BindColumn(colref, 0);
181191
181403
  if (result.HasError()) {
@@ -181193,17 +181405,29 @@ BoundStatement Binder::Bind(VacuumStatement &stmt) {
181193
181405
  }
181194
181406
  select_list.push_back(move(result.expression));
181195
181407
  }
181196
- auto table_scan = CreatePlan(*ref);
181197
- D_ASSERT(table_scan->type == LogicalOperatorType::LOGICAL_GET);
181198
- auto &get = (LogicalGet &)*table_scan;
181199
- for (idx_t i = 0; i < get.column_ids.size(); i++) {
181200
- stmt.info->column_id_map[i] = get.column_ids[i];
181201
- }
181408
+ stmt.info->columns = move(non_generated_column_names);
181409
+ if (!select_list.empty()) {
181410
+ auto table_scan = CreatePlan(*ref);
181411
+ D_ASSERT(table_scan->type == LogicalOperatorType::LOGICAL_GET);
181202
181412
 
181203
- auto projection = make_unique<LogicalProjection>(GenerateTableIndex(), move(select_list));
181204
- projection->children.push_back(move(table_scan));
181413
+ auto &get = (LogicalGet &)*table_scan;
181414
+
181415
+ D_ASSERT(select_list.size() == get.column_ids.size());
181416
+ D_ASSERT(stmt.info->columns.size() == get.column_ids.size());
181417
+ for (idx_t i = 0; i < get.column_ids.size(); i++) {
181418
+ stmt.info->column_id_map[i] = ref->table->columns[get.column_ids[i]].StorageOid();
181419
+ }
181420
+
181421
+ auto projection = make_unique<LogicalProjection>(GenerateTableIndex(), move(select_list));
181422
+ projection->children.push_back(move(table_scan));
181205
181423
 
181206
- root = move(projection);
181424
+ root = move(projection);
181425
+ } else {
181426
+ // eg. CREATE TABLE test (x AS (1));
181427
+ // ANALYZE test;
181428
+ // Make it not a SINK so it doesn't have to do anything
181429
+ stmt.info->has_table = false;
181430
+ }
181207
181431
  }
181208
181432
  auto vacuum = make_unique<LogicalSimple>(LogicalOperatorType::LOGICAL_VACUUM, move(stmt.info));
181209
181433
  if (root) {
@@ -186039,6 +186263,7 @@ unique_ptr<TableFilter> ConjunctionAndFilter::Deserialize(FieldReader &source) {
186039
186263
 
186040
186264
 
186041
186265
 
186266
+
186042
186267
  namespace duckdb {
186043
186268
 
186044
186269
  ConstantFilter::ConstantFilter(ExpressionType comparison_type_p, Value constant_p)
@@ -186062,7 +186287,7 @@ FilterPropagateResult ConstantFilter::CheckStatistics(BaseStatistics &stats) {
186062
186287
  case PhysicalType::DOUBLE:
186063
186288
  return ((NumericStatistics &)stats).CheckZonemap(comparison_type, constant);
186064
186289
  case PhysicalType::VARCHAR:
186065
- return ((StringStatistics &)stats).CheckZonemap(comparison_type, constant.ToString());
186290
+ return ((StringStatistics &)stats).CheckZonemap(comparison_type, StringValue::Get(constant));
186066
186291
  default:
186067
186292
  return FilterPropagateResult::NO_PRUNING_POSSIBLE;
186068
186293
  }
@@ -322188,6 +322413,8 @@ exit:
322188
322413
  // See the end of this file for a list
322189
322414
 
322190
322415
 
322416
+ // otherwise we have different definitions for mbedtls_pk_context / mbedtls_sha256_context
322417
+ #define MBEDTLS_ALLOW_PRIVATE_ACCESS
322191
322418
 
322192
322419
 
322193
322420