duckdb 0.5.1-dev7.0 → 0.5.1-dev90.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.1-dev7.0",
4
+ "version": "0.5.1-dev90.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -620,7 +620,88 @@ public:
620
620
 
621
621
  } // namespace duckdb
622
622
 
623
+ //===----------------------------------------------------------------------===//
624
+ // DuckDB
625
+ //
626
+ // extension_functions.hpp
627
+ //
628
+ //
629
+ //===----------------------------------------------------------------------===//
630
+
631
+
623
632
 
633
+
634
+
635
+ namespace duckdb {
636
+
637
+ struct ExtensionFunction {
638
+ char function[48];
639
+ char extension[48];
640
+ };
641
+
642
+ static constexpr ExtensionFunction EXTENSION_FUNCTIONS[] = {
643
+ {"->>", "json"},
644
+ {"array_to_json", "json"},
645
+ {"create_fts_index", "fts"},
646
+ {"dbgen", "tpch"},
647
+ {"drop_fts_index", "fts"},
648
+ {"dsdgen", "tpcds"},
649
+ {"excel_text", "excel"},
650
+ {"from_json", "json"},
651
+ {"from_json_strict", "json"},
652
+ {"from_substrait", "substrait"},
653
+ {"get_substrait", "substrait"},
654
+ {"get_substrait_json", "substrait"},
655
+ {"icu_calendar_names", "icu"},
656
+ {"icu_sort_key", "icu"},
657
+ {"json", "json"},
658
+ {"json_array", "json"},
659
+ {"json_array_length", "json"},
660
+ {"json_extract", "json"},
661
+ {"json_extract_path", "json"},
662
+ {"json_extract_path_text", "json"},
663
+ {"json_extract_string", "json"},
664
+ {"json_group_array", "json"},
665
+ {"json_group_object", "json"},
666
+ {"json_group_structure", "json"},
667
+ {"json_merge_patch", "json"},
668
+ {"json_object", "json"},
669
+ {"json_quote", "json"},
670
+ {"json_structure", "json"},
671
+ {"json_transform", "json"},
672
+ {"json_transform_strict", "json"},
673
+ {"json_type", "json"},
674
+ {"json_valid", "json"},
675
+ {"make_timestamptz", "icu"},
676
+ {"parquet_metadata", "parquet"},
677
+ {"parquet_scan", "parquet"},
678
+ {"parquet_schema", "parquet"},
679
+ {"pg_timezone_names", "icu"},
680
+ {"postgres_attach", "postgres_scanner"},
681
+ {"postgres_scan", "postgres_scanner"},
682
+ {"postgres_scan_pushdown", "postgres_scanner"},
683
+ {"read_json_objects", "json"},
684
+ {"read_ndjson_objects", "json"},
685
+ {"read_parquet", "parquet"},
686
+ {"row_to_json", "json"},
687
+ {"sqlite_attach", "sqlite_scanner"},
688
+ {"sqlite_scan", "sqlite_scanner"},
689
+ {"stem", "fts"},
690
+ {"text", "excel"},
691
+ {"to_json", "json"},
692
+ {"tpcds", "tpcds"},
693
+ {"tpcds_answers", "tpcds"},
694
+ {"tpcds_queries", "tpcds"},
695
+ {"tpch", "tpch"},
696
+ {"tpch_answers", "tpch"},
697
+ {"tpch_queries", "tpch"},
698
+ {"visualize_diff_profiling_output", "visualizer"},
699
+ {"visualize_json_profiling_output", "visualizer"},
700
+ {"visualize_last_profiling_output", "visualizer"},
701
+ };
702
+ } // namespace duckdb
703
+
704
+ #include <algorithm>
624
705
  namespace duckdb {
625
706
 
626
707
  string SimilarCatalogEntry::GetQualifiedName() const {
@@ -823,6 +904,16 @@ SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const
823
904
  return {most_similar.first, most_similar.second, schema_of_most_similar};
824
905
  }
825
906
 
907
+ string FindExtension(const string &function_name) {
908
+ auto size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionFunction);
909
+ auto it = std::lower_bound(
910
+ EXTENSION_FUNCTIONS, EXTENSION_FUNCTIONS + size, function_name,
911
+ [](const ExtensionFunction &element, const string &value) { return element.function < value; });
912
+ if (it != EXTENSION_FUNCTIONS + size && it->function == function_name) {
913
+ return it->extension;
914
+ }
915
+ return "";
916
+ }
826
917
  CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name,
827
918
  CatalogType type, const vector<SchemaCatalogEntry *> &schemas,
828
919
  QueryErrorContext error_context) {
@@ -836,7 +927,12 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
836
927
  }
837
928
  });
838
929
  auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
839
-
930
+ auto extension_name = FindExtension(entry_name);
931
+ if (!extension_name.empty()) {
932
+ return CatalogException("Function with name %s is not on the catalog, but it exists in the %s extension. To "
933
+ "Install and Load the extension, run: INSTALL %s; LOAD %s;",
934
+ entry_name, extension_name, extension_name, extension_name);
935
+ }
840
936
  string did_you_mean;
841
937
  if (unseen_entry.Found() && unseen_entry.distance < entry.distance) {
842
938
  did_you_mean = "\nDid you mean \"" + unseen_entry.GetQualifiedName() + "\"?";
@@ -33336,6 +33432,9 @@ void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr
33336
33432
 
33337
33433
  static inline void VerifyUnswizzledString(const RowLayout &layout, const idx_t &col_idx, const data_ptr_t &row_ptr) {
33338
33434
  #ifdef DEBUG
33435
+ if (layout.GetTypes()[col_idx] == LogicalTypeId::BLOB) {
33436
+ return;
33437
+ }
33339
33438
  idx_t entry_idx;
33340
33439
  idx_t idx_in_entry;
33341
33440
  ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
@@ -35673,7 +35772,10 @@ struct SortConstants {
35673
35772
 
35674
35773
  struct SortLayout {
35675
35774
  public:
35775
+ SortLayout() {
35776
+ }
35676
35777
  explicit SortLayout(const vector<BoundOrderByNode> &orders);
35778
+ SortLayout GetPrefixComparisonLayout(idx_t num_prefix_cols) const;
35677
35779
 
35678
35780
  public:
35679
35781
  idx_t column_count;
@@ -37324,6 +37426,32 @@ SortLayout::SortLayout(const vector<BoundOrderByNode> &orders)
37324
37426
  blob_layout.Initialize(blob_layout_types);
37325
37427
  }
37326
37428
 
37429
+ SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const {
37430
+ SortLayout result;
37431
+ result.column_count = num_prefix_cols;
37432
+ result.all_constant = true;
37433
+ result.comparison_size = 0;
37434
+ for (idx_t col_idx = 0; col_idx < num_prefix_cols; col_idx++) {
37435
+ result.order_types.push_back(order_types[col_idx]);
37436
+ result.order_by_null_types.push_back(order_by_null_types[col_idx]);
37437
+ result.logical_types.push_back(logical_types[col_idx]);
37438
+
37439
+ result.all_constant = result.all_constant && constant_size[col_idx];
37440
+ result.constant_size.push_back(constant_size[col_idx]);
37441
+
37442
+ result.comparison_size += column_sizes[col_idx];
37443
+ result.column_sizes.push_back(column_sizes[col_idx]);
37444
+
37445
+ result.prefix_lengths.push_back(prefix_lengths[col_idx]);
37446
+ result.stats.push_back(stats[col_idx]);
37447
+ result.has_null.push_back(has_null[col_idx]);
37448
+ }
37449
+ result.entry_size = entry_size;
37450
+ result.blob_layout = blob_layout;
37451
+ result.sorting_to_blob_col = sorting_to_blob_col;
37452
+ return result;
37453
+ }
37454
+
37327
37455
  LocalSortState::LocalSortState() : initialized(false) {
37328
37456
  }
37329
37457
 
@@ -47573,11 +47701,36 @@ Value Value::CreateValue(dtime_t value) {
47573
47701
  return Value::TIME(value);
47574
47702
  }
47575
47703
 
47704
+ template <>
47705
+ Value Value::CreateValue(dtime_tz_t value) {
47706
+ return Value::TIMETZ(value);
47707
+ }
47708
+
47576
47709
  template <>
47577
47710
  Value Value::CreateValue(timestamp_t value) {
47578
47711
  return Value::TIMESTAMP(value);
47579
47712
  }
47580
47713
 
47714
+ template <>
47715
+ Value Value::CreateValue(timestamp_sec_t value) {
47716
+ return Value::TIMESTAMPSEC(value);
47717
+ }
47718
+
47719
+ template <>
47720
+ Value Value::CreateValue(timestamp_ms_t value) {
47721
+ return Value::TIMESTAMPMS(value);
47722
+ }
47723
+
47724
+ template <>
47725
+ Value Value::CreateValue(timestamp_ns_t value) {
47726
+ return Value::TIMESTAMPNS(value);
47727
+ }
47728
+
47729
+ template <>
47730
+ Value Value::CreateValue(timestamp_tz_t value) {
47731
+ return Value::TIMESTAMPTZ(value);
47732
+ }
47733
+
47581
47734
  template <>
47582
47735
  Value Value::CreateValue(const char *value) {
47583
47736
  return Value(string(value));
@@ -49150,19 +49303,6 @@ void Vector::Resize(idx_t cur_size, idx_t new_size) {
49150
49303
  }
49151
49304
  }
49152
49305
 
49153
- // FIXME Just like DECIMAL, it's important that type_info gets considered when determining whether or not to cast
49154
- // just comparing internal type is not always enough
49155
- static bool ValueShouldBeCast(const LogicalType &incoming, const LogicalType &target) {
49156
- if (incoming.InternalType() != target.InternalType()) {
49157
- return true;
49158
- }
49159
- if (incoming.id() == LogicalTypeId::DECIMAL && incoming.id() == target.id()) {
49160
- //! Compare the type_info
49161
- return incoming != target;
49162
- }
49163
- return false;
49164
- }
49165
-
49166
49306
  void Vector::SetValue(idx_t index, const Value &val) {
49167
49307
  if (GetVectorType() == VectorType::DICTIONARY_VECTOR) {
49168
49308
  // dictionary: apply dictionary and forward to child
@@ -49170,10 +49310,11 @@ void Vector::SetValue(idx_t index, const Value &val) {
49170
49310
  auto &child = DictionaryVector::Child(*this);
49171
49311
  return child.SetValue(sel_vector.get_index(index), val);
49172
49312
  }
49173
- if (ValueShouldBeCast(val.type(), GetType())) {
49313
+ if (val.type() != GetType()) {
49174
49314
  SetValue(index, val.CastAs(GetType()));
49175
49315
  return;
49176
49316
  }
49317
+ D_ASSERT(val.type().InternalType() == GetType().InternalType());
49177
49318
 
49178
49319
  validity.EnsureWritable();
49179
49320
  validity.Set(index, !val.IsNull());
@@ -49424,7 +49565,10 @@ Value Vector::GetValue(const Vector &v_p, idx_t index_p) {
49424
49565
  auto value = GetValueInternal(v_p, index_p);
49425
49566
  // set the alias of the type to the correct value, if there is a type alias
49426
49567
  if (v_p.GetType().HasAlias()) {
49427
- value.type().SetAlias(v_p.GetType().GetAlias());
49568
+ value.type().CopyAuxInfo(v_p.GetType());
49569
+ }
49570
+ if (v_p.GetType().id() != LogicalTypeId::AGGREGATE_STATE && value.type().id() != LogicalTypeId::AGGREGATE_STATE) {
49571
+ D_ASSERT(v_p.GetType() == value.type());
49428
49572
  }
49429
49573
  return value;
49430
49574
  }
@@ -51491,6 +51635,7 @@ public:
51491
51635
  if (!alias.empty()) {
51492
51636
  return false;
51493
51637
  }
51638
+ //! We only need to compare aliases when both types have them in this case
51494
51639
  return true;
51495
51640
  }
51496
51641
  if (alias != other_p->alias) {
@@ -51504,8 +51649,7 @@ public:
51504
51649
  if (type != other_p->type) {
51505
51650
  return false;
51506
51651
  }
51507
- auto &other = (ExtraTypeInfo &)*other_p;
51508
- return alias == other.alias && EqualsInternal(other_p);
51652
+ return alias == other_p->alias && EqualsInternal(other_p);
51509
51653
  }
51510
51654
  //! Serializes a ExtraTypeInfo to a stand-alone binary blob
51511
51655
  virtual void Serialize(FieldWriter &writer) const {};
@@ -52184,10 +52328,7 @@ LogicalType LogicalType::Deserialize(Deserializer &source) {
52184
52328
  return LogicalType(id, move(info));
52185
52329
  }
52186
52330
 
52187
- bool LogicalType::operator==(const LogicalType &rhs) const {
52188
- if (id_ != rhs.id_) {
52189
- return false;
52190
- }
52331
+ bool LogicalType::EqualTypeInfo(const LogicalType &rhs) const {
52191
52332
  if (type_info_.get() == rhs.type_info_.get()) {
52192
52333
  return true;
52193
52334
  }
@@ -52199,6 +52340,13 @@ bool LogicalType::operator==(const LogicalType &rhs) const {
52199
52340
  }
52200
52341
  }
52201
52342
 
52343
+ bool LogicalType::operator==(const LogicalType &rhs) const {
52344
+ if (id_ != rhs.id_) {
52345
+ return false;
52346
+ }
52347
+ return EqualTypeInfo(rhs);
52348
+ }
52349
+
52202
52350
  } // namespace duckdb
52203
52351
 
52204
52352
 
@@ -64927,12 +65075,14 @@ public:
64927
65075
 
64928
65076
  WindowGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
64929
65077
  const Types &payload_types, idx_t max_mem, bool external)
64930
- : memory_per_thread(max_mem), count(0), partition_layout(partitions) {
65078
+ : memory_per_thread(max_mem), count(0) {
64931
65079
 
64932
65080
  RowLayout payload_layout;
64933
65081
  payload_layout.Initialize(payload_types);
64934
65082
  global_sort = make_unique<GlobalSortState>(buffer_manager, orders, payload_layout);
64935
65083
  global_sort->external = external;
65084
+
65085
+ partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
64936
65086
  }
64937
65087
 
64938
65088
  void Combine(LocalSortState &local_sort) {
@@ -93281,21 +93431,21 @@ AggregateFunction GetHistogramFunction(const LogicalType &type) {
93281
93431
  case LogicalType::VARCHAR:
93282
93432
  return GetMapType<HistogramStringFunctor, string, IS_ORDERED>(type);
93283
93433
  case LogicalType::TIMESTAMP:
93284
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93434
+ return GetMapType<HistogramFunctor, timestamp_t, IS_ORDERED>(type);
93285
93435
  case LogicalType::TIMESTAMP_TZ:
93286
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93436
+ return GetMapType<HistogramFunctor, timestamp_tz_t, IS_ORDERED>(type);
93287
93437
  case LogicalType::TIMESTAMP_S:
93288
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93438
+ return GetMapType<HistogramFunctor, timestamp_sec_t, IS_ORDERED>(type);
93289
93439
  case LogicalType::TIMESTAMP_MS:
93290
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93440
+ return GetMapType<HistogramFunctor, timestamp_ms_t, IS_ORDERED>(type);
93291
93441
  case LogicalType::TIMESTAMP_NS:
93292
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93442
+ return GetMapType<HistogramFunctor, timestamp_ns_t, IS_ORDERED>(type);
93293
93443
  case LogicalType::TIME:
93294
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93444
+ return GetMapType<HistogramFunctor, dtime_t, IS_ORDERED>(type);
93295
93445
  case LogicalType::TIME_TZ:
93296
- return GetMapType<HistogramFunctor, int64_t, IS_ORDERED>(type);
93446
+ return GetMapType<HistogramFunctor, dtime_tz_t, IS_ORDERED>(type);
93297
93447
  case LogicalType::DATE:
93298
- return GetMapType<HistogramFunctor, int32_t, IS_ORDERED>(type);
93448
+ return GetMapType<HistogramFunctor, date_t, IS_ORDERED>(type);
93299
93449
  default:
93300
93450
  throw InternalException("Unimplemented histogram aggregate");
93301
93451
  }
@@ -96859,7 +97009,8 @@ struct DateDiff {
96859
97009
  struct WeekOperator {
96860
97010
  template <class TA, class TB, class TR>
96861
97011
  static inline TR Operation(TA startdate, TB enddate) {
96862
- return Date::Epoch(enddate) / Interval::SECS_PER_WEEK - Date::Epoch(startdate) / Interval::SECS_PER_WEEK;
97012
+ return Date::Epoch(Date::GetMondayOfCurrentWeek(enddate)) / Interval::SECS_PER_WEEK -
97013
+ Date::Epoch(Date::GetMondayOfCurrentWeek(startdate)) / Interval::SECS_PER_WEEK;
96863
97014
  }
96864
97015
  };
96865
97016
 
@@ -103243,12 +103394,49 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
103243
103394
  result, state_vector.state_vector, count);
103244
103395
  break;
103245
103396
  case PhysicalType::INT32:
103246
- FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
103247
- result, state_vector.state_vector, count);
103397
+ if (key_type.id() == LogicalTypeId::DATE) {
103398
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, date_t>(
103399
+ result, state_vector.state_vector, count);
103400
+ } else {
103401
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int32_t>(
103402
+ result, state_vector.state_vector, count);
103403
+ }
103248
103404
  break;
103249
103405
  case PhysicalType::INT64:
103250
- FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
103251
- result, state_vector.state_vector, count);
103406
+ switch (key_type.id()) {
103407
+ case LogicalTypeId::TIME:
103408
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_t>(
103409
+ result, state_vector.state_vector, count);
103410
+ break;
103411
+ case LogicalTypeId::TIME_TZ:
103412
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, dtime_tz_t>(
103413
+ result, state_vector.state_vector, count);
103414
+ break;
103415
+ case LogicalTypeId::TIMESTAMP:
103416
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_t>(
103417
+ result, state_vector.state_vector, count);
103418
+ break;
103419
+ case LogicalTypeId::TIMESTAMP_MS:
103420
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ms_t>(
103421
+ result, state_vector.state_vector, count);
103422
+ break;
103423
+ case LogicalTypeId::TIMESTAMP_NS:
103424
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_ns_t>(
103425
+ result, state_vector.state_vector, count);
103426
+ break;
103427
+ case LogicalTypeId::TIMESTAMP_SEC:
103428
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_sec_t>(
103429
+ result, state_vector.state_vector, count);
103430
+ break;
103431
+ case LogicalTypeId::TIMESTAMP_TZ:
103432
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, timestamp_tz_t>(
103433
+ result, state_vector.state_vector, count);
103434
+ break;
103435
+ default:
103436
+ FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, int64_t>(
103437
+ result, state_vector.state_vector, count);
103438
+ break;
103439
+ }
103252
103440
  break;
103253
103441
  case PhysicalType::FLOAT:
103254
103442
  FUNCTION_FUNCTOR::template ListExecuteFunction<FinalizeValueFunctor, float>(
@@ -104318,18 +104506,12 @@ void SinkDataChunk(Vector *child_vector, SelectionVector &sel, idx_t offset_list
104318
104506
  static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &result) {
104319
104507
  D_ASSERT(args.ColumnCount() >= 1 && args.ColumnCount() <= 3);
104320
104508
  auto count = args.size();
104321
- Vector &lists = args.data[0];
104509
+ Vector &input_lists = args.data[0];
104322
104510
 
104323
104511
  result.SetVectorType(VectorType::FLAT_VECTOR);
104324
104512
  auto &result_validity = FlatVector::Validity(result);
104325
104513
 
104326
- for (auto &v : args.data) {
104327
- if (v.GetVectorType() != VectorType::FLAT_VECTOR && v.GetVectorType() != VectorType::CONSTANT_VECTOR) {
104328
- v.Flatten(count);
104329
- }
104330
- }
104331
-
104332
- if (lists.GetType().id() == LogicalTypeId::SQLNULL) {
104514
+ if (input_lists.GetType().id() == LogicalTypeId::SQLNULL) {
104333
104515
  result_validity.SetInvalid(0);
104334
104516
  return;
104335
104517
  }
@@ -104344,15 +104526,18 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104344
104526
  LocalSortState local_sort_state;
104345
104527
  local_sort_state.Initialize(global_sort_state, buffer_manager);
104346
104528
 
104529
+ // this ensures that we do not change the order of the entries in the input chunk
104530
+ VectorOperations::Copy(input_lists, result, count, 0, 0);
104531
+
104347
104532
  // get the child vector
104348
- auto lists_size = ListVector::GetListSize(lists);
104349
- auto &child_vector = ListVector::GetEntry(lists);
104533
+ auto lists_size = ListVector::GetListSize(result);
104534
+ auto &child_vector = ListVector::GetEntry(result);
104350
104535
  UnifiedVectorFormat child_data;
104351
104536
  child_vector.ToUnifiedFormat(lists_size, child_data);
104352
104537
 
104353
104538
  // get the lists data
104354
104539
  UnifiedVectorFormat lists_data;
104355
- lists.ToUnifiedFormat(count, lists_data);
104540
+ result.ToUnifiedFormat(count, lists_data);
104356
104541
  auto list_entries = (list_entry_t *)lists_data.data;
104357
104542
 
104358
104543
  // create the lists_indices vector, this contains an element for each list's entry,
@@ -104449,8 +104634,6 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104449
104634
  child_vector.Flatten(sel_sorted_idx);
104450
104635
  }
104451
104636
 
104452
- result.Reference(lists);
104453
-
104454
104637
  if (args.AllConstant()) {
104455
104638
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
104456
104639
  }
@@ -141402,7 +141585,7 @@ Value ForceCompressionSetting::GetSetting(ClientContext &context) {
141402
141585
  //===--------------------------------------------------------------------===//
141403
141586
  void HomeDirectorySetting::SetLocal(ClientContext &context, const Value &input) {
141404
141587
  auto &config = ClientConfig::GetConfig(context);
141405
- config.home_directory = input.IsNull() ? input.ToString() : string();
141588
+ config.home_directory = input.IsNull() ? string() : input.ToString();
141406
141589
  }
141407
141590
 
141408
141591
  Value HomeDirectorySetting::GetSetting(ClientContext &context) {
@@ -151925,6 +152108,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalGet
151925
152108
 
151926
152109
 
151927
152110
 
152111
+
151928
152112
  namespace duckdb {
151929
152113
 
151930
152114
  void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, unique_ptr<LogicalOperator> *node_ptr) {
@@ -151954,10 +152138,15 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
151954
152138
  // semi or inner join on false; entire node can be pruned
151955
152139
  ReplaceWithEmptyResult(*node_ptr);
151956
152140
  return;
151957
- case JoinType::ANTI:
151958
- // anti join: replace entire join with LHS
151959
- *node_ptr = move(join.children[0]);
152141
+ case JoinType::ANTI: {
152142
+ // when the right child has data, return the left child
152143
+ // when the right child has no data, return an empty set
152144
+ auto limit = make_unique<LogicalLimit>(1, 0, nullptr, nullptr);
152145
+ limit->AddChild(move(join.children[1]));
152146
+ auto cross_product = LogicalCrossProduct::Create(move(join.children[0]), move(limit));
152147
+ *node_ptr = move(cross_product);
151960
152148
  return;
152149
+ }
151961
152150
  case JoinType::LEFT:
151962
152151
  // anti/left outer join: replace right side with empty node
151963
152152
  ReplaceWithEmptyResult(join.children[1]);
@@ -151985,10 +152174,15 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
151985
152174
  } else {
151986
152175
  // this is the only condition and it is always true: all conditions are true
151987
152176
  switch (join.join_type) {
151988
- case JoinType::SEMI:
151989
- // semi join on true: replace entire join with LHS
151990
- *node_ptr = move(join.children[0]);
152177
+ case JoinType::SEMI: {
152178
+ // when the right child has data, return the left child
152179
+ // when the right child has no data, return an empty set
152180
+ auto limit = make_unique<LogicalLimit>(1, 0, nullptr, nullptr);
152181
+ limit->AddChild(move(join.children[1]));
152182
+ auto cross_product = LogicalCrossProduct::Create(move(join.children[0]), move(limit));
152183
+ *node_ptr = move(cross_product);
151991
152184
  return;
152185
+ }
151992
152186
  case JoinType::INNER:
151993
152187
  case JoinType::LEFT:
151994
152188
  case JoinType::RIGHT:
@@ -152105,6 +152299,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalJoin
152105
152299
  // then propagate into the join conditions
152106
152300
  switch (join.type) {
152107
152301
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
152302
+ case LogicalOperatorType::LOGICAL_DELIM_JOIN:
152108
152303
  PropagateStatistics((LogicalComparisonJoin &)join, node_ptr);
152109
152304
  break;
152110
152305
  case LogicalOperatorType::LOGICAL_ANY_JOIN:
@@ -171765,7 +171960,7 @@ LogicalType Transformer::TransformTypeName(duckdb_libpgquery::PGTypeName *type_n
171765
171960
 
171766
171961
  result_type = LogicalType::MAP(move(children));
171767
171962
  } else {
171768
- int8_t width, scale;
171963
+ int64_t width, scale;
171769
171964
  if (base_type == LogicalTypeId::DECIMAL) {
171770
171965
  // default decimal width/scale
171771
171966
  width = 18;
@@ -180863,6 +181058,9 @@ BoundStatement Binder::BindSummarize(ShowStatement &stmt) {
180863
181058
 
180864
181059
 
180865
181060
 
181061
+
181062
+
181063
+
180866
181064
  //===----------------------------------------------------------------------===//
180867
181065
  // DuckDB
180868
181066
  //
@@ -180897,10 +181095,6 @@ protected:
180897
181095
 
180898
181096
 
180899
181097
 
180900
-
180901
-
180902
-
180903
-
180904
181098
  //===----------------------------------------------------------------------===//
180905
181099
  // DuckDB
180906
181100
  //
@@ -180933,6 +181127,8 @@ public:
180933
181127
  };
180934
181128
  } // namespace duckdb
180935
181129
 
181130
+
181131
+
180936
181132
  #include <algorithm>
180937
181133
 
180938
181134
  namespace duckdb {
@@ -181103,10 +181299,10 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) {
181103
181299
  if (column.Generated()) {
181104
181300
  throw BinderException("Cant update column \"%s\" because it is a generated column!", column.Name());
181105
181301
  }
181106
- if (std::find(update->columns.begin(), update->columns.end(), column.Oid()) != update->columns.end()) {
181302
+ if (std::find(update->columns.begin(), update->columns.end(), column.StorageOid()) != update->columns.end()) {
181107
181303
  throw BinderException("Multiple assignments to same column \"%s\"", colname);
181108
181304
  }
181109
- update->columns.push_back(column.Oid());
181305
+ update->columns.push_back(column.StorageOid());
181110
181306
 
181111
181307
  if (expr->type == ExpressionType::VALUE_DEFAULT) {
181112
181308
  update->expressions.push_back(make_unique<BoundDefaultExpression>(column.Type()));
@@ -181188,7 +181384,20 @@ BoundStatement Binder::Bind(VacuumStatement &stmt) {
181188
181384
  auto &get = (LogicalGet &)*ref->get;
181189
181385
  columns.insert(columns.end(), get.names.begin(), get.names.end());
181190
181386
  }
181387
+
181388
+ case_insensitive_set_t column_name_set;
181389
+ vector<string> non_generated_column_names;
181191
181390
  for (auto &col_name : columns) {
181391
+ if (column_name_set.count(col_name) > 0) {
181392
+ throw BinderException("Vacuum the same column twice(same name in column name list)");
181393
+ }
181394
+ column_name_set.insert(col_name);
181395
+ auto &col = ref->table->GetColumn(col_name);
181396
+ // ignore generated column
181397
+ if (col.Generated()) {
181398
+ continue;
181399
+ }
181400
+ non_generated_column_names.push_back(col_name);
181192
181401
  ColumnRefExpression colref(col_name, ref->table->name);
181193
181402
  auto result = bind_context.BindColumn(colref, 0);
181194
181403
  if (result.HasError()) {
@@ -181196,17 +181405,29 @@ BoundStatement Binder::Bind(VacuumStatement &stmt) {
181196
181405
  }
181197
181406
  select_list.push_back(move(result.expression));
181198
181407
  }
181199
- auto table_scan = CreatePlan(*ref);
181200
- D_ASSERT(table_scan->type == LogicalOperatorType::LOGICAL_GET);
181201
- auto &get = (LogicalGet &)*table_scan;
181202
- for (idx_t i = 0; i < get.column_ids.size(); i++) {
181203
- stmt.info->column_id_map[i] = get.column_ids[i];
181204
- }
181408
+ stmt.info->columns = move(non_generated_column_names);
181409
+ if (!select_list.empty()) {
181410
+ auto table_scan = CreatePlan(*ref);
181411
+ D_ASSERT(table_scan->type == LogicalOperatorType::LOGICAL_GET);
181205
181412
 
181206
- auto projection = make_unique<LogicalProjection>(GenerateTableIndex(), move(select_list));
181207
- projection->children.push_back(move(table_scan));
181413
+ auto &get = (LogicalGet &)*table_scan;
181414
+
181415
+ D_ASSERT(select_list.size() == get.column_ids.size());
181416
+ D_ASSERT(stmt.info->columns.size() == get.column_ids.size());
181417
+ for (idx_t i = 0; i < get.column_ids.size(); i++) {
181418
+ stmt.info->column_id_map[i] = ref->table->columns[get.column_ids[i]].StorageOid();
181419
+ }
181208
181420
 
181209
- root = move(projection);
181421
+ auto projection = make_unique<LogicalProjection>(GenerateTableIndex(), move(select_list));
181422
+ projection->children.push_back(move(table_scan));
181423
+
181424
+ root = move(projection);
181425
+ } else {
181426
+ // eg. CREATE TABLE test (x AS (1));
181427
+ // ANALYZE test;
181428
+ // Make it not a SINK so it doesn't have to do anything
181429
+ stmt.info->has_table = false;
181430
+ }
181210
181431
  }
181211
181432
  auto vacuum = make_unique<LogicalSimple>(LogicalOperatorType::LOGICAL_VACUUM, move(stmt.info));
181212
181433
  if (root) {