duckdb 0.7.2-dev457.0 → 0.7.2-dev614.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/binding.gyp +9 -9
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-table-range.cpp +7 -7
  4. package/src/duckdb/extension/parquet/parquet-extension.cpp +1 -0
  5. package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -1
  6. package/src/duckdb/src/catalog/catalog.cpp +13 -0
  7. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -21
  8. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +8 -2
  9. package/src/duckdb/src/catalog/catalog_set.cpp +1 -0
  10. package/src/duckdb/src/common/arrow/arrow_appender.cpp +48 -4
  11. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  12. package/src/duckdb/src/common/field_writer.cpp +1 -0
  13. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +4 -0
  14. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +15 -2
  15. package/src/duckdb/src/common/types/blob.cpp +1 -1
  16. package/src/duckdb/src/common/types/chunk_collection.cpp +2 -2
  17. package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
  18. package/src/duckdb/src/common/types/value.cpp +8 -8
  19. package/src/duckdb/src/common/types.cpp +147 -64
  20. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +12 -3
  21. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +2 -2
  22. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -11
  23. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  24. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +2 -2
  25. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +20 -40
  26. package/src/duckdb/src/function/aggregate/holistic/quantile.cpp +1 -1
  27. package/src/duckdb/src/function/aggregate/nested/list.cpp +8 -8
  28. package/src/duckdb/src/function/cast/struct_cast.cpp +1 -1
  29. package/src/duckdb/src/function/scalar/date/date_part.cpp +1 -1
  30. package/src/duckdb/src/function/scalar/list/list_concat.cpp +5 -4
  31. package/src/duckdb/src/function/scalar/list/list_lambdas.cpp +3 -3
  32. package/src/duckdb/src/function/scalar/list/list_value.cpp +1 -1
  33. package/src/duckdb/src/function/scalar/map/map_entries.cpp +1 -1
  34. package/src/duckdb/src/function/scalar/struct/struct_insert.cpp +1 -1
  35. package/src/duckdb/src/function/scalar/struct/struct_pack.cpp +1 -1
  36. package/src/duckdb/src/function/table/arrow.cpp +5 -2
  37. package/src/duckdb/src/function/table/arrow_conversion.cpp +18 -0
  38. package/src/duckdb/src/function/table/system/test_all_types.cpp +2 -2
  39. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  40. package/src/duckdb/src/function/table_function.cpp +11 -11
  41. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +3 -0
  42. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
  43. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +12 -4
  44. package/src/duckdb/src/include/duckdb/common/{http_stats.hpp → http_state.hpp} +18 -4
  45. package/src/duckdb/src/include/duckdb/common/serializer/buffered_deserializer.hpp +4 -2
  46. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +8 -2
  47. package/src/duckdb/src/include/duckdb/common/serializer.hpp +13 -0
  48. package/src/duckdb/src/include/duckdb/common/types/value.hpp +3 -3
  49. package/src/duckdb/src/include/duckdb/common/types.hpp +30 -4
  50. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +1 -0
  51. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +12 -1
  52. package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
  53. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -3
  54. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -0
  55. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  56. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +3 -0
  57. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +6 -0
  58. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +1 -1
  59. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +7 -0
  60. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +0 -1
  61. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +1 -1
  62. package/src/duckdb/src/main/client_context.cpp +30 -32
  63. package/src/duckdb/src/main/client_data.cpp +7 -6
  64. package/src/duckdb/src/main/config.cpp +4 -0
  65. package/src/duckdb/src/main/database.cpp +9 -0
  66. package/src/duckdb/src/main/query_profiler.cpp +17 -15
  67. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +3 -4
  68. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +5 -3
  69. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +9 -2
  70. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +2 -2
  71. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +2 -0
  72. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +1 -1
  73. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +3 -2
  74. package/src/duckdb/src/parser/transform/statement/transform_rename.cpp +3 -4
  75. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +1 -1
  76. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +4 -3
  77. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -1
  78. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +2 -1
  79. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +1 -0
  80. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +11 -1
  81. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -0
  82. package/src/duckdb/src/planner/expression_binder.cpp +1 -1
  83. package/src/duckdb/src/planner/logical_operator.cpp +4 -2
  84. package/src/duckdb/src/planner/planner.cpp +2 -1
  85. package/src/duckdb/src/storage/checkpoint_manager.cpp +8 -3
  86. package/src/duckdb/src/storage/meta_block_reader.cpp +22 -0
  87. package/src/duckdb/src/storage/statistics/list_stats.cpp +6 -2
  88. package/src/duckdb/src/storage/statistics/struct_stats.cpp +3 -1
  89. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  90. package/src/duckdb/src/storage/table/update_segment.cpp +11 -7
  91. package/src/duckdb/src/storage/wal_replay.cpp +8 -5
  92. package/src/duckdb/src/storage/write_ahead_log.cpp +2 -2
  93. package/src/duckdb/src/transaction/commit_state.cpp +11 -7
  94. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1152 -1152
  95. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
@@ -1,7 +1,9 @@
1
1
  #include "duckdb/common/types.hpp"
2
2
 
3
3
  #include "duckdb/catalog/catalog.hpp"
4
+ #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
4
5
  #include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
6
+ #include "duckdb/catalog/catalog_search_path.hpp"
5
7
  #include "duckdb/catalog/default/default_types.hpp"
6
8
  #include "duckdb/common/exception.hpp"
7
9
  #include "duckdb/common/field_writer.hpp"
@@ -17,6 +19,11 @@
17
19
  #include "duckdb/common/types/vector.hpp"
18
20
  #include "duckdb/common/unordered_map.hpp"
19
21
  #include "duckdb/function/cast_rules.hpp"
22
+ #include "duckdb/main/attached_database.hpp"
23
+ #include "duckdb/main/client_context.hpp"
24
+ #include "duckdb/main/client_data.hpp"
25
+ #include "duckdb/main/database.hpp"
26
+ #include "duckdb/main/database_manager.hpp"
20
27
  #include "duckdb/parser/keyword_helper.hpp"
21
28
  #include "duckdb/parser/parser.hpp"
22
29
 
@@ -515,7 +522,7 @@ LogicalType GetUserTypeRecursive(const LogicalType &type, ClientContext &context
515
522
  for (auto &child : StructType::GetChildTypes(type)) {
516
523
  children.emplace_back(child.first, GetUserTypeRecursive(child.second, context));
517
524
  }
518
- return LogicalType::STRUCT(std::move(children));
525
+ return LogicalType::STRUCT(children);
519
526
  }
520
527
  if (type.id() == LogicalTypeId::LIST) {
521
528
  return LogicalType::LIST(GetUserTypeRecursive(ListType::GetChildType(type), context));
@@ -766,12 +773,12 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
766
773
  if (type_id == LogicalTypeId::LIST) {
767
774
  // list: perform max recursively on child type
768
775
  auto new_child = MaxLogicalType(ListType::GetChildType(left), ListType::GetChildType(right));
769
- return LogicalType::LIST(std::move(new_child));
776
+ return LogicalType::LIST(new_child);
770
777
  }
771
778
  if (type_id == LogicalTypeId::MAP) {
772
779
  // list: perform max recursively on child type
773
780
  auto new_child = MaxLogicalType(ListType::GetChildType(left), ListType::GetChildType(right));
774
- return LogicalType::MAP(std::move(new_child));
781
+ return LogicalType::MAP(new_child);
775
782
  }
776
783
  if (type_id == LogicalTypeId::STRUCT) {
777
784
  // struct: perform recursively
@@ -785,10 +792,10 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
785
792
  child_list_t<LogicalType> child_types;
786
793
  for (idx_t i = 0; i < left_child_types.size(); i++) {
787
794
  auto child_type = MaxLogicalType(left_child_types[i].second, right_child_types[i].second);
788
- child_types.push_back(make_pair(left_child_types[i].first, std::move(child_type)));
795
+ child_types.emplace_back(left_child_types[i].first, std::move(child_type));
789
796
  }
790
797
 
791
- return LogicalType::STRUCT(std::move(child_types));
798
+ return LogicalType::STRUCT(child_types);
792
799
  }
793
800
  if (type_id == LogicalTypeId::UNION) {
794
801
  auto left_member_count = UnionType::GetMemberCount(left);
@@ -797,7 +804,7 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
797
804
  // return the "larger" type, with the most members
798
805
  return left_member_count > right_member_count ? left : right;
799
806
  }
800
- // otherwise, keep left, dont try to meld the two together.
807
+ // otherwise, keep left, don't try to meld the two together.
801
808
  return left;
802
809
  }
803
810
  // types are equal but no extra specifier: just return the type
@@ -838,17 +845,6 @@ bool ApproxEqual(double ldecimal, double rdecimal) {
838
845
  //===--------------------------------------------------------------------===//
839
846
  // Extra Type Info
840
847
  //===--------------------------------------------------------------------===//
841
- enum class ExtraTypeInfoType : uint8_t {
842
- INVALID_TYPE_INFO = 0,
843
- GENERIC_TYPE_INFO = 1,
844
- DECIMAL_TYPE_INFO = 2,
845
- STRING_TYPE_INFO = 3,
846
- LIST_TYPE_INFO = 4,
847
- STRUCT_TYPE_INFO = 5,
848
- ENUM_TYPE_INFO = 6,
849
- USER_TYPE_INFO = 7,
850
- AGGREGATE_STATE_TYPE_INFO = 8
851
- };
852
848
 
853
849
  struct ExtraTypeInfo {
854
850
  explicit ExtraTypeInfo(ExtraTypeInfoType type) : type(type) {
@@ -941,6 +937,10 @@ TypeCatalogEntry *LogicalType::GetCatalog(const LogicalType &type) {
941
937
  return ((ExtraTypeInfo &)*info).catalog_entry;
942
938
  }
943
939
 
940
+ ExtraTypeInfoType LogicalType::GetExtraTypeInfoType(const ExtraTypeInfo &type) {
941
+ return type.type;
942
+ }
943
+
944
944
  //===--------------------------------------------------------------------===//
945
945
  // Decimal Type
946
946
  //===--------------------------------------------------------------------===//
@@ -1076,8 +1076,8 @@ const LogicalType &ListType::GetChildType(const LogicalType &type) {
1076
1076
  return ((ListTypeInfo &)*info).child_type;
1077
1077
  }
1078
1078
 
1079
- LogicalType LogicalType::LIST(LogicalType child) {
1080
- auto info = make_shared<ListTypeInfo>(std::move(child));
1079
+ LogicalType LogicalType::LIST(const LogicalType &child) {
1080
+ auto info = make_shared<ListTypeInfo>(child);
1081
1081
  return LogicalType(LogicalTypeId::LIST, std::move(info));
1082
1082
  }
1083
1083
 
@@ -1108,7 +1108,7 @@ public:
1108
1108
  for (uint32_t i = 0; i < child_types_size; i++) {
1109
1109
  auto name = source.Read<string>();
1110
1110
  auto type = LogicalType::Deserialize(source);
1111
- child_list.push_back(make_pair(std::move(name), std::move(type)));
1111
+ child_list.emplace_back(std::move(name), std::move(type));
1112
1112
  }
1113
1113
  return make_shared<StructTypeInfo>(std::move(child_list));
1114
1114
  }
@@ -1207,8 +1207,8 @@ idx_t StructType::GetChildCount(const LogicalType &type) {
1207
1207
  return StructType::GetChildTypes(type).size();
1208
1208
  }
1209
1209
 
1210
- LogicalType LogicalType::STRUCT(child_list_t<LogicalType> children) {
1211
- auto info = make_shared<StructTypeInfo>(std::move(children));
1210
+ LogicalType LogicalType::STRUCT(const child_list_t<LogicalType> &children) {
1211
+ auto info = make_shared<StructTypeInfo>(children);
1212
1212
  return LogicalType(LogicalTypeId::STRUCT, std::move(info));
1213
1213
  }
1214
1214
 
@@ -1220,16 +1220,16 @@ LogicalType LogicalType::AGGREGATE_STATE(aggregate_state_t state_type) { // NOLI
1220
1220
  //===--------------------------------------------------------------------===//
1221
1221
  // Map Type
1222
1222
  //===--------------------------------------------------------------------===//
1223
- LogicalType LogicalType::MAP(LogicalType child) {
1224
- auto info = make_shared<ListTypeInfo>(std::move(child));
1223
+ LogicalType LogicalType::MAP(const LogicalType &child) {
1224
+ auto info = make_shared<ListTypeInfo>(child);
1225
1225
  return LogicalType(LogicalTypeId::MAP, std::move(info));
1226
1226
  }
1227
1227
 
1228
1228
  LogicalType LogicalType::MAP(LogicalType key, LogicalType value) {
1229
1229
  child_list_t<LogicalType> child_types;
1230
- child_types.push_back({"key", std::move(key)});
1231
- child_types.push_back({"value", std::move(value)});
1232
- return LogicalType::MAP(LogicalType::STRUCT(std::move(child_types)));
1230
+ child_types.emplace_back("key", std::move(key));
1231
+ child_types.emplace_back("value", std::move(value));
1232
+ return LogicalType::MAP(LogicalType::STRUCT(child_types));
1233
1233
  }
1234
1234
 
1235
1235
  const LogicalType &MapType::KeyType(const LogicalType &type) {
@@ -1247,7 +1247,7 @@ const LogicalType &MapType::ValueType(const LogicalType &type) {
1247
1247
  //===--------------------------------------------------------------------===//
1248
1248
 
1249
1249
  LogicalType LogicalType::UNION(child_list_t<LogicalType> members) {
1250
- D_ASSERT(members.size() > 0);
1250
+ D_ASSERT(!members.empty());
1251
1251
  D_ASSERT(members.size() <= UnionType::MAX_UNION_MEMBERS);
1252
1252
  // union types always have a hidden "tag" field in front
1253
1253
  members.insert(members.begin(), {"", LogicalType::TINYINT});
@@ -1270,7 +1270,7 @@ const string &UnionType::GetMemberName(const LogicalType &type, idx_t index) {
1270
1270
  }
1271
1271
 
1272
1272
  idx_t UnionType::GetMemberCount(const LogicalType &type) {
1273
- // dont count the "tag" field
1273
+ // don't count the "tag" field
1274
1274
  return StructType::GetChildTypes(type).size() - 1;
1275
1275
  }
1276
1276
  const child_list_t<LogicalType> UnionType::CopyMemberTypes(const LogicalType &type) {
@@ -1326,13 +1326,27 @@ enum EnumDictType : uint8_t { INVALID = 0, VECTOR_DICT = 1 };
1326
1326
 
1327
1327
  struct EnumTypeInfo : public ExtraTypeInfo {
1328
1328
  explicit EnumTypeInfo(string enum_name_p, Vector &values_insert_order_p, idx_t dict_size_p)
1329
- : ExtraTypeInfo(ExtraTypeInfoType::ENUM_TYPE_INFO), dict_type(EnumDictType::VECTOR_DICT),
1330
- enum_name(std::move(enum_name_p)), values_insert_order(values_insert_order_p), dict_size(dict_size_p) {
1331
- }
1332
- EnumDictType dict_type;
1333
- string enum_name;
1334
- Vector values_insert_order;
1335
- idx_t dict_size;
1329
+ : ExtraTypeInfo(ExtraTypeInfoType::ENUM_TYPE_INFO), values_insert_order(values_insert_order_p),
1330
+ dict_type(EnumDictType::VECTOR_DICT), enum_name(std::move(enum_name_p)), dict_size(dict_size_p) {
1331
+ }
1332
+
1333
+ const EnumDictType &GetEnumDictType() {
1334
+ return dict_type;
1335
+ };
1336
+ const string &GetEnumName() {
1337
+ return enum_name;
1338
+ };
1339
+ const string GetSchemaName() const {
1340
+ return catalog_entry ? catalog_entry->schema->name : "";
1341
+ };
1342
+ const Vector &GetValuesInsertOrder() {
1343
+ return values_insert_order;
1344
+ };
1345
+ const idx_t &GetDictSize() {
1346
+ return dict_size;
1347
+ };
1348
+ EnumTypeInfo(const EnumTypeInfo &) = delete;
1349
+ EnumTypeInfo &operator=(const EnumTypeInfo &) = delete;
1336
1350
 
1337
1351
  protected:
1338
1352
  // Equalities are only used in enums with different catalog entries
@@ -1362,12 +1376,39 @@ protected:
1362
1376
  if (dict_type != EnumDictType::VECTOR_DICT) {
1363
1377
  throw InternalException("Cannot serialize non-vector dictionary ENUM types");
1364
1378
  }
1365
- writer.WriteField<uint32_t>(dict_size);
1366
- writer.WriteString(enum_name);
1367
- ((Vector &)values_insert_order).Serialize(dict_size, writer.GetSerializer());
1379
+ bool serialize_internals = GetSchemaName().empty() || writer.GetSerializer().is_query_plan;
1380
+ EnumType::Serialize(writer, *this, serialize_internals);
1368
1381
  }
1382
+
1383
+ Vector values_insert_order;
1384
+
1385
+ private:
1386
+ EnumDictType dict_type;
1387
+ string enum_name;
1388
+ idx_t dict_size;
1369
1389
  };
1370
1390
 
1391
+ // If this type is primarily stored in the catalog or not. Enums from Pandas/Factors are not in the catalog.
1392
+
1393
+ void EnumType::Serialize(FieldWriter &writer, const ExtraTypeInfo &type_info, bool serialize_internals) {
1394
+ D_ASSERT(type_info.type == ExtraTypeInfoType::ENUM_TYPE_INFO);
1395
+ auto &enum_info = (EnumTypeInfo &)type_info;
1396
+ // Store Schema Name
1397
+ writer.WriteString(enum_info.GetSchemaName());
1398
+ // Store Enum Name
1399
+ writer.WriteString(enum_info.GetEnumName());
1400
+ // Store If we are serializing the internals
1401
+ writer.WriteField<bool>(serialize_internals);
1402
+ if (serialize_internals) {
1403
+ // We must serialize the internals
1404
+ auto dict_size = enum_info.GetDictSize();
1405
+ // Store Dictionary Size
1406
+ writer.WriteField<uint32_t>(dict_size);
1407
+ // Store Vector Order By Insertion
1408
+ ((Vector &)enum_info.GetValuesInsertOrder()).Serialize(dict_size, writer.GetSerializer());
1409
+ }
1410
+ }
1411
+
1371
1412
  template <class T>
1372
1413
  struct EnumTypeInfoTemplated : public EnumTypeInfo {
1373
1414
  explicit EnumTypeInfoTemplated(const string &enum_name_p, Vector &values_insert_order_p, idx_t size_p)
@@ -1391,13 +1432,21 @@ struct EnumTypeInfoTemplated : public EnumTypeInfo {
1391
1432
  }
1392
1433
  }
1393
1434
 
1394
- static shared_ptr<EnumTypeInfoTemplated> Deserialize(FieldReader &reader, uint32_t size) {
1395
- auto enum_name = reader.ReadRequired<string>();
1435
+ static shared_ptr<EnumTypeInfoTemplated> Deserialize(FieldReader &reader, uint32_t size, string enum_name) {
1436
+
1396
1437
  Vector values_insert_order(LogicalType::VARCHAR, size);
1397
1438
  values_insert_order.Deserialize(size, reader.GetSource());
1398
1439
  return make_shared<EnumTypeInfoTemplated>(std::move(enum_name), values_insert_order, size);
1399
1440
  }
1400
1441
 
1442
+ string_map_t<T> &GetValues() {
1443
+ return values;
1444
+ }
1445
+
1446
+ EnumTypeInfoTemplated(const EnumTypeInfoTemplated &) = delete;
1447
+ EnumTypeInfoTemplated &operator=(const EnumTypeInfoTemplated &) = delete;
1448
+
1449
+ private:
1401
1450
  string_map_t<T> values;
1402
1451
  };
1403
1452
 
@@ -1405,7 +1454,7 @@ const string &EnumType::GetTypeName(const LogicalType &type) {
1405
1454
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1406
1455
  auto info = type.AuxInfo();
1407
1456
  D_ASSERT(info);
1408
- return ((EnumTypeInfo &)*info).enum_name;
1457
+ return ((EnumTypeInfo &)*info).GetEnumName();
1409
1458
  }
1410
1459
 
1411
1460
  static PhysicalType EnumVectorDictType(idx_t size) {
@@ -1454,11 +1503,11 @@ int64_t EnumType::GetPos(const LogicalType &type, const string_t &key) {
1454
1503
  auto info = type.AuxInfo();
1455
1504
  switch (type.InternalType()) {
1456
1505
  case PhysicalType::UINT8:
1457
- return TemplatedGetPos(((EnumTypeInfoTemplated<uint8_t> &)*info).values, key);
1506
+ return TemplatedGetPos(((EnumTypeInfoTemplated<uint8_t> &)*info).GetValues(), key);
1458
1507
  case PhysicalType::UINT16:
1459
- return TemplatedGetPos(((EnumTypeInfoTemplated<uint16_t> &)*info).values, key);
1508
+ return TemplatedGetPos(((EnumTypeInfoTemplated<uint16_t> &)*info).GetValues(), key);
1460
1509
  case PhysicalType::UINT32:
1461
- return TemplatedGetPos(((EnumTypeInfoTemplated<uint32_t> &)*info).values, key);
1510
+ return TemplatedGetPos(((EnumTypeInfoTemplated<uint32_t> &)*info).GetValues(), key);
1462
1511
  default:
1463
1512
  throw InternalException("ENUM can only have unsigned integers (except UINT64) as physical types");
1464
1513
  }
@@ -1466,22 +1515,22 @@ int64_t EnumType::GetPos(const LogicalType &type, const string_t &key) {
1466
1515
 
1467
1516
  const string EnumType::GetValue(const Value &val) {
1468
1517
  auto info = val.type().AuxInfo();
1469
- auto &values_insert_order = ((EnumTypeInfo &)*info).values_insert_order;
1518
+ auto &values_insert_order = ((EnumTypeInfo &)*info).GetValuesInsertOrder();
1470
1519
  return StringValue::Get(values_insert_order.GetValue(val.GetValue<uint32_t>()));
1471
1520
  }
1472
1521
 
1473
- Vector &EnumType::GetValuesInsertOrder(const LogicalType &type) {
1522
+ const Vector &EnumType::GetValuesInsertOrder(const LogicalType &type) {
1474
1523
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1475
1524
  auto info = type.AuxInfo();
1476
1525
  D_ASSERT(info);
1477
- return ((EnumTypeInfo &)*info).values_insert_order;
1526
+ return ((EnumTypeInfo &)*info).GetValuesInsertOrder();
1478
1527
  }
1479
1528
 
1480
1529
  idx_t EnumType::GetSize(const LogicalType &type) {
1481
1530
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1482
1531
  auto info = type.AuxInfo();
1483
1532
  D_ASSERT(info);
1484
- return ((EnumTypeInfo &)*info).dict_size;
1533
+ return ((EnumTypeInfo &)*info).GetDictSize();
1485
1534
  }
1486
1535
 
1487
1536
  void EnumType::SetCatalog(LogicalType &type, TypeCatalogEntry *catalog_entry) {
@@ -1497,13 +1546,18 @@ TypeCatalogEntry *EnumType::GetCatalog(const LogicalType &type) {
1497
1546
  return ((EnumTypeInfo &)*info).catalog_entry;
1498
1547
  }
1499
1548
 
1549
+ string EnumType::GetSchemaName(const LogicalType &type) {
1550
+ auto catalog_entry = EnumType::GetCatalog(type);
1551
+ return catalog_entry ? catalog_entry->schema->name : "";
1552
+ }
1553
+
1500
1554
  PhysicalType EnumType::GetPhysicalType(const LogicalType &type) {
1501
1555
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1502
1556
  auto aux_info = type.AuxInfo();
1503
1557
  D_ASSERT(aux_info);
1504
1558
  auto &info = (EnumTypeInfo &)*aux_info;
1505
- D_ASSERT(info.dict_type == EnumDictType::VECTOR_DICT);
1506
- return EnumVectorDictType(info.dict_size);
1559
+ D_ASSERT(info.GetEnumDictType() == EnumDictType::VECTOR_DICT);
1560
+ return EnumVectorDictType(info.GetDictSize());
1507
1561
  }
1508
1562
 
1509
1563
  //===--------------------------------------------------------------------===//
@@ -1549,20 +1603,40 @@ shared_ptr<ExtraTypeInfo> ExtraTypeInfo::Deserialize(FieldReader &reader) {
1549
1603
  extra_info = UserTypeInfo::Deserialize(reader);
1550
1604
  break;
1551
1605
  case ExtraTypeInfoType::ENUM_TYPE_INFO: {
1552
- auto enum_size = reader.ReadRequired<uint32_t>();
1553
- auto enum_internal_type = EnumVectorDictType(enum_size);
1554
- switch (enum_internal_type) {
1555
- case PhysicalType::UINT8:
1556
- extra_info = EnumTypeInfoTemplated<uint8_t>::Deserialize(reader, enum_size);
1557
- break;
1558
- case PhysicalType::UINT16:
1559
- extra_info = EnumTypeInfoTemplated<uint16_t>::Deserialize(reader, enum_size);
1560
- break;
1561
- case PhysicalType::UINT32:
1562
- extra_info = EnumTypeInfoTemplated<uint32_t>::Deserialize(reader, enum_size);
1606
+ auto schema_name = reader.ReadRequired<string>();
1607
+ auto enum_name = reader.ReadRequired<string>();
1608
+ auto deserialize_internals = reader.ReadRequired<bool>();
1609
+ if (!deserialize_internals) {
1610
+ // this means the enum should already be in the catalog.
1611
+ auto &client_context = reader.GetSource().GetContext();
1612
+ // See if the serializer has a catalog
1613
+ auto catalog = reader.GetSource().GetCatalog();
1614
+ if (catalog) {
1615
+ auto enum_type = catalog->GetType(client_context, schema_name, enum_name, true);
1616
+ if (enum_type != LogicalType::INVALID) {
1617
+ extra_info = enum_type.GetAuxInfoShrPtr();
1618
+ }
1619
+ }
1620
+ if (!extra_info) {
1621
+ throw InternalException("Could not find ENUM in the Catalog to deserialize");
1622
+ }
1563
1623
  break;
1564
- default:
1565
- throw InternalException("Invalid Physical Type for ENUMs");
1624
+ } else {
1625
+ auto enum_size = reader.ReadRequired<uint32_t>();
1626
+ auto enum_internal_type = EnumVectorDictType(enum_size);
1627
+ switch (enum_internal_type) {
1628
+ case PhysicalType::UINT8:
1629
+ extra_info = EnumTypeInfoTemplated<uint8_t>::Deserialize(reader, enum_size, enum_name);
1630
+ break;
1631
+ case PhysicalType::UINT16:
1632
+ extra_info = EnumTypeInfoTemplated<uint16_t>::Deserialize(reader, enum_size, enum_name);
1633
+ break;
1634
+ case PhysicalType::UINT32:
1635
+ extra_info = EnumTypeInfoTemplated<uint32_t>::Deserialize(reader, enum_size, enum_name);
1636
+ break;
1637
+ default:
1638
+ throw InternalException("Invalid Physical Type for ENUMs");
1639
+ }
1566
1640
  }
1567
1641
  } break;
1568
1642
  case ExtraTypeInfoType::AGGREGATE_STATE_TYPE_INFO:
@@ -1592,6 +1666,15 @@ void LogicalType::Serialize(Serializer &serializer) const {
1592
1666
  writer.Finalize();
1593
1667
  }
1594
1668
 
1669
+ void LogicalType::SerializeEnumType(Serializer &serializer) const {
1670
+ FieldWriter writer(serializer);
1671
+ writer.WriteField<LogicalTypeId>(id_);
1672
+ writer.WriteField<ExtraTypeInfoType>(type_info_->type);
1673
+ EnumType::Serialize(writer, *type_info_, true);
1674
+ writer.WriteString(type_info_->alias);
1675
+ writer.Finalize();
1676
+ }
1677
+
1595
1678
  LogicalType LogicalType::Deserialize(Deserializer &source) {
1596
1679
  FieldReader reader(source);
1597
1680
  auto id = reader.ReadRequired<LogicalTypeId>();
@@ -1289,15 +1289,24 @@ void WindowExecutor::Evaluate(idx_t row_idx, DataChunk &input_chunk, Vector &res
1289
1289
  break;
1290
1290
  }
1291
1291
  case ExpressionType::WINDOW_FIRST_VALUE: {
1292
+ // Same as NTH_VALUE(..., 1)
1292
1293
  idx_t n = 1;
1293
1294
  const auto first_idx = FindNextStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1294
- CopyCell(payload_collection, 0, first_idx, result, output_offset);
1295
+ if (!n) {
1296
+ CopyCell(payload_collection, 0, first_idx, result, output_offset);
1297
+ } else {
1298
+ FlatVector::SetNull(result, output_offset, true);
1299
+ }
1295
1300
  break;
1296
1301
  }
1297
1302
  case ExpressionType::WINDOW_LAST_VALUE: {
1298
1303
  idx_t n = 1;
1299
- CopyCell(payload_collection, 0, FindPrevStart(ignore_nulls, bounds.window_start, bounds.window_end, n),
1300
- result, output_offset);
1304
+ const auto last_idx = FindPrevStart(ignore_nulls, bounds.window_start, bounds.window_end, n);
1305
+ if (!n) {
1306
+ CopyCell(payload_collection, 0, last_idx, result, output_offset);
1307
+ } else {
1308
+ FlatVector::SetNull(result, output_offset, true);
1309
+ }
1301
1310
  break;
1302
1311
  }
1303
1312
  case ExpressionType::WINDOW_NTH_VALUE: {
@@ -386,7 +386,7 @@ IEJoinUnion::IEJoinUnion(ClientContext &context, const PhysicalIEJoin &op, Sorte
386
386
  // Sort on the first expression
387
387
  auto ref = make_unique<BoundReferenceExpression>(order1.expression->return_type, 0);
388
388
  vector<BoundOrderByNode> orders;
389
- orders.emplace_back(BoundOrderByNode(order1.type, order1.null_order, std::move(ref)));
389
+ orders.emplace_back(order1.type, order1.null_order, std::move(ref));
390
390
 
391
391
  l1 = make_unique<SortedTable>(context, orders, payload_layout);
392
392
 
@@ -422,7 +422,7 @@ IEJoinUnion::IEJoinUnion(ClientContext &context, const PhysicalIEJoin &op, Sorte
422
422
  // Sort on the first expression
423
423
  orders.clear();
424
424
  ref = make_unique<BoundReferenceExpression>(order2.expression->return_type, 0);
425
- orders.emplace_back(BoundOrderByNode(order2.type, order2.null_order, std::move(ref)));
425
+ orders.emplace_back(order2.type, order2.null_order, std::move(ref));
426
426
 
427
427
  ExpressionExecutor executor(context);
428
428
  executor.AddExpression(*orders[0].expression);
@@ -30,25 +30,20 @@ PhysicalPiecewiseMergeJoin::PhysicalPiecewiseMergeJoin(LogicalOperator &op, uniq
30
30
  switch (cond.comparison) {
31
31
  case ExpressionType::COMPARE_LESSTHAN:
32
32
  case ExpressionType::COMPARE_LESSTHANOREQUALTO:
33
- lhs_orders.emplace_back(
34
- BoundOrderByNode(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(left)));
35
- rhs_orders.emplace_back(
36
- BoundOrderByNode(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(right)));
33
+ lhs_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(left));
34
+ rhs_orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_LAST, std::move(right));
37
35
  break;
38
36
  case ExpressionType::COMPARE_GREATERTHAN:
39
37
  case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
40
- lhs_orders.emplace_back(
41
- BoundOrderByNode(OrderType::DESCENDING, OrderByNullType::NULLS_LAST, std::move(left)));
42
- rhs_orders.emplace_back(
43
- BoundOrderByNode(OrderType::DESCENDING, OrderByNullType::NULLS_LAST, std::move(right)));
38
+ lhs_orders.emplace_back(OrderType::DESCENDING, OrderByNullType::NULLS_LAST, std::move(left));
39
+ rhs_orders.emplace_back(OrderType::DESCENDING, OrderByNullType::NULLS_LAST, std::move(right));
44
40
  break;
45
41
  case ExpressionType::COMPARE_NOTEQUAL:
46
42
  case ExpressionType::COMPARE_DISTINCT_FROM:
47
43
  // Allowed in multi-predicate joins, but can't be first/sort.
48
44
  D_ASSERT(!lhs_orders.empty());
49
- lhs_orders.emplace_back(BoundOrderByNode(OrderType::INVALID, OrderByNullType::NULLS_LAST, std::move(left)));
50
- rhs_orders.emplace_back(
51
- BoundOrderByNode(OrderType::INVALID, OrderByNullType::NULLS_LAST, std::move(right)));
45
+ lhs_orders.emplace_back(OrderType::INVALID, OrderByNullType::NULLS_LAST, std::move(left));
46
+ rhs_orders.emplace_back(OrderType::INVALID, OrderByNullType::NULLS_LAST, std::move(right));
52
47
  break;
53
48
 
54
49
  default:
@@ -46,7 +46,7 @@ void PhysicalRangeJoin::LocalSortedTable::Sink(DataChunk &input, GlobalSortState
46
46
 
47
47
  // Only sort the primary key
48
48
  DataChunk join_head;
49
- join_head.data.emplace_back(Vector(keys.data[0]));
49
+ join_head.data.emplace_back(keys.data[0]);
50
50
  join_head.SetCardinality(keys.size());
51
51
 
52
52
  // Sink the data into the local sort state
@@ -85,8 +85,8 @@ static string CreateDirRecursive(const vector<idx_t> &cols, const vector<string>
85
85
  CreateDir(path, fs);
86
86
 
87
87
  for (idx_t i = 0; i < cols.size(); i++) {
88
- auto partition_col_name = names[cols[i]];
89
- auto partition_value = values[i];
88
+ const auto &partition_col_name = names[cols[i]];
89
+ const auto &partition_value = values[i];
90
90
  string p_dir = partition_col_name + "=" + partition_value.ToString();
91
91
  path = fs.JoinPath(path, p_dir);
92
92
  CreateDir(path, fs);
@@ -15,10 +15,11 @@ PhysicalCreateType::PhysicalCreateType(unique_ptr<CreateTypeInfo> info, idx_t es
15
15
  //===--------------------------------------------------------------------===//
16
16
  class CreateTypeGlobalState : public GlobalSinkState {
17
17
  public:
18
- explicit CreateTypeGlobalState(ClientContext &context) : collection(context, {LogicalType::VARCHAR}) {
18
+ explicit CreateTypeGlobalState(ClientContext &context) : result(LogicalType::VARCHAR) {
19
19
  }
20
-
21
- ColumnDataCollection collection;
20
+ Vector result;
21
+ idx_t size = 0;
22
+ idx_t capacity = STANDARD_VECTOR_SIZE;
22
23
  };
23
24
 
24
25
  unique_ptr<GlobalSinkState> PhysicalCreateType::GetGlobalSinkState(ClientContext &context) const {
@@ -28,7 +29,7 @@ unique_ptr<GlobalSinkState> PhysicalCreateType::GetGlobalSinkState(ClientContext
28
29
  SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
29
30
  DataChunk &input) const {
30
31
  auto &gstate = (CreateTypeGlobalState &)gstate_p;
31
- idx_t total_row_count = gstate.collection.Count() + input.size();
32
+ idx_t total_row_count = gstate.size + input.size();
32
33
  if (total_row_count > NumericLimits<uint32_t>::Maximum()) {
33
34
  throw InvalidInputException("Attempted to create ENUM of size %llu, which exceeds the maximum size of %llu",
34
35
  total_row_count, NumericLimits<uint32_t>::Maximum());
@@ -36,15 +37,23 @@ SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, GlobalSinkSta
36
37
  UnifiedVectorFormat sdata;
37
38
  input.data[0].ToUnifiedFormat(input.size(), sdata);
38
39
 
40
+ if (total_row_count > gstate.capacity) {
41
+ // We must resize our result vector
42
+ gstate.result.Resize(gstate.capacity, gstate.capacity * 2);
43
+ gstate.capacity *= 2;
44
+ }
45
+
46
+ auto src_ptr = (string_t *)sdata.data;
47
+ auto result_ptr = FlatVector::GetData<string_t>(gstate.result);
39
48
  // Input vector has NULL value, we just throw an exception
40
49
  for (idx_t i = 0; i < input.size(); i++) {
41
50
  idx_t idx = sdata.sel->get_index(i);
42
51
  if (!sdata.validity.RowIsValid(idx)) {
43
52
  throw InvalidInputException("Attempted to create ENUM type with NULL value!");
44
53
  }
54
+ result_ptr[gstate.size++] =
55
+ StringVector::AddStringOrBlob(gstate.result, src_ptr[idx].GetDataUnsafe(), src_ptr[idx].GetSize());
45
56
  }
46
-
47
- gstate.collection.Append(input);
48
57
  return SinkResultType::NEED_MORE_INPUT;
49
58
  }
50
59
 
@@ -72,44 +81,15 @@ void PhysicalCreateType::GetData(ExecutionContext &context, DataChunk &chunk, Gl
72
81
 
73
82
  if (IsSink()) {
74
83
  D_ASSERT(info->type == LogicalType::INVALID);
75
-
76
84
  auto &g_sink_state = (CreateTypeGlobalState &)*sink_state;
77
- auto &collection = g_sink_state.collection;
78
-
79
- idx_t total_row_count = collection.Count();
80
-
81
- ColumnDataScanState scan_state;
82
- collection.InitializeScan(scan_state);
83
-
84
- DataChunk scan_chunk;
85
- collection.InitializeScanChunk(scan_chunk);
86
-
87
- Vector result(LogicalType::VARCHAR, total_row_count);
88
- auto result_ptr = FlatVector::GetData<string_t>(result);
89
-
90
- idx_t offset = 0;
91
- while (collection.Scan(scan_state, scan_chunk)) {
92
- idx_t src_row_count = scan_chunk.size();
93
- auto &src_vec = scan_chunk.data[0];
94
- D_ASSERT(src_vec.GetVectorType() == VectorType::FLAT_VECTOR);
95
- D_ASSERT(src_vec.GetType().id() == LogicalType::VARCHAR);
96
-
97
- auto src_ptr = FlatVector::GetData<string_t>(src_vec);
98
-
99
- for (idx_t i = 0; i < src_row_count; i++) {
100
- idx_t target_index = offset + i;
101
- result_ptr[target_index] =
102
- StringVector::AddStringOrBlob(result, src_ptr[i].GetDataUnsafe(), src_ptr[i].GetSize());
103
- }
104
-
105
- offset += src_row_count;
106
- }
107
-
108
- info->type = LogicalType::ENUM(info->name, result, total_row_count);
85
+ info->type = LogicalType::ENUM(info->name, g_sink_state.result, g_sink_state.size);
109
86
  }
110
87
 
111
88
  auto &catalog = Catalog::GetCatalog(context.client, info->catalog);
112
- catalog.CreateType(context.client, info.get());
89
+ auto catalog_entry = catalog.CreateType(context.client, info.get());
90
+ D_ASSERT(catalog_entry->type == CatalogType::TYPE_ENTRY);
91
+ auto catalog_type = (TypeCatalogEntry *)catalog_entry;
92
+ LogicalType::SetCatalog(info->type, catalog_type);
113
93
  state.finished = true;
114
94
  }
115
95
 
@@ -414,7 +414,7 @@ struct QuantileBindData : public FunctionData {
414
414
  size_t pos = 0;
415
415
  size_t neg = 0;
416
416
  for (idx_t i = 0; i < quantiles_p.size(); ++i) {
417
- const auto q = quantiles_p[i];
417
+ const auto &q = quantiles_p[i];
418
418
  pos += (q > 0);
419
419
  neg += (q < 0);
420
420
  quantiles.emplace_back(QuantileAbs(q));
@@ -723,9 +723,9 @@ static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment,
723
723
  read_data_from_segment.segment_function = ReadDataFromVarcharSegment;
724
724
  copy_data_from_segment.segment_function = CopyDataFromListSegment;
725
725
 
726
- write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
726
+ write_data_to_segment.child_functions.emplace_back();
727
727
  write_data_to_segment.child_functions.back().create_segment = CreatePrimitiveSegment<char>;
728
- copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
728
+ copy_data_from_segment.child_functions.emplace_back();
729
729
  copy_data_from_segment.child_functions.back().segment_function = CopyDataFromPrimitiveSegment<char>;
730
730
  break;
731
731
  }
@@ -736,9 +736,9 @@ static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment,
736
736
  copy_data_from_segment.segment_function = CopyDataFromListSegment;
737
737
 
738
738
  // recurse
739
- write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
740
- read_data_from_segment.child_functions.emplace_back(ReadDataFromSegment());
741
- copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
739
+ write_data_to_segment.child_functions.emplace_back();
740
+ read_data_from_segment.child_functions.emplace_back();
741
+ copy_data_from_segment.child_functions.emplace_back();
742
742
  GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
743
743
  read_data_from_segment.child_functions.back(),
744
744
  copy_data_from_segment.child_functions.back(), ListType::GetChildType(type));
@@ -753,9 +753,9 @@ static void GetSegmentDataFunctions(WriteDataToSegment &write_data_to_segment,
753
753
  // recurse
754
754
  auto child_types = StructType::GetChildTypes(type);
755
755
  for (idx_t i = 0; i < child_types.size(); i++) {
756
- write_data_to_segment.child_functions.emplace_back(WriteDataToSegment());
757
- read_data_from_segment.child_functions.emplace_back(ReadDataFromSegment());
758
- copy_data_from_segment.child_functions.emplace_back(CopyDataFromSegment());
756
+ write_data_to_segment.child_functions.emplace_back();
757
+ read_data_from_segment.child_functions.emplace_back();
758
+ copy_data_from_segment.child_functions.emplace_back();
759
759
  GetSegmentDataFunctions(write_data_to_segment.child_functions.back(),
760
760
  read_data_from_segment.child_functions.back(),
761
761
  copy_data_from_segment.child_functions.back(), child_types[i].second);
@@ -129,7 +129,7 @@ BoundCastInfo DefaultCasts::StructCastSwitch(BindCastInput &input, const Logical
129
129
  for (auto &child_entry : struct_children) {
130
130
  varchar_children.push_back(make_pair(child_entry.first, LogicalType::VARCHAR));
131
131
  }
132
- auto varchar_type = LogicalType::STRUCT(std::move(varchar_children));
132
+ auto varchar_type = LogicalType::STRUCT(varchar_children);
133
133
  return BoundCastInfo(StructToVarcharCast,
134
134
  StructBoundCastData::BindStructToStructCast(input, source, varchar_type));
135
135
  }