duckdb 0.7.2-dev402.0 → 0.7.2-dev586.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/binding.gyp +9 -9
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +0 -1
  4. package/src/duckdb/extension/parquet/parquet-extension.cpp +1 -0
  5. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +8 -6
  6. package/src/duckdb/src/catalog/catalog.cpp +13 -0
  7. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -21
  8. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +8 -2
  9. package/src/duckdb/src/catalog/catalog_set.cpp +1 -0
  10. package/src/duckdb/src/common/arrow/arrow_appender.cpp +48 -4
  11. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  12. package/src/duckdb/src/common/field_writer.cpp +1 -0
  13. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +4 -0
  14. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +15 -2
  15. package/src/duckdb/src/common/types.cpp +136 -53
  16. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +20 -40
  17. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +9 -1
  18. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +5 -8
  19. package/src/duckdb/src/function/aggregate/distributive/bool.cpp +2 -0
  20. package/src/duckdb/src/function/aggregate/distributive/count.cpp +1 -0
  21. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -0
  22. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +8 -0
  23. package/src/duckdb/src/function/aggregate/holistic/quantile.cpp +15 -0
  24. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +42 -11
  25. package/src/duckdb/src/function/function_binder.cpp +1 -8
  26. package/src/duckdb/src/function/scalar/date/current.cpp +0 -2
  27. package/src/duckdb/src/function/table/arrow.cpp +3 -0
  28. package/src/duckdb/src/function/table/arrow_conversion.cpp +18 -0
  29. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  30. package/src/duckdb/src/function/table_function.cpp +11 -11
  31. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +3 -0
  32. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
  33. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +12 -4
  34. package/src/duckdb/src/include/duckdb/common/{http_stats.hpp → http_state.hpp} +18 -4
  35. package/src/duckdb/src/include/duckdb/common/serializer/buffered_deserializer.hpp +4 -2
  36. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +8 -2
  37. package/src/duckdb/src/include/duckdb/common/serializer.hpp +13 -0
  38. package/src/duckdb/src/include/duckdb/common/types.hpp +27 -1
  39. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +1 -0
  40. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -3
  41. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +6 -3
  42. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -6
  43. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +12 -1
  44. package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
  45. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -3
  46. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -0
  47. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/optimizer/rule/list.hpp +1 -0
  49. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +24 -0
  50. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
  51. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +2 -2
  52. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +3 -0
  53. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +6 -0
  54. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -0
  55. package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -3
  56. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +3 -0
  57. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  58. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -1
  59. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +3 -0
  60. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +7 -0
  62. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +1 -1
  63. package/src/duckdb/src/main/client_context.cpp +30 -32
  64. package/src/duckdb/src/main/client_data.cpp +7 -6
  65. package/src/duckdb/src/main/database.cpp +9 -0
  66. package/src/duckdb/src/main/query_profiler.cpp +17 -15
  67. package/src/duckdb/src/optimizer/optimizer.cpp +1 -0
  68. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +30 -0
  69. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +9 -2
  70. package/src/duckdb/src/parser/expression/star_expression.cpp +6 -6
  71. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -1
  72. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +17 -2
  73. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +45 -40
  74. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +7 -0
  75. package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +0 -7
  76. package/src/duckdb/src/parser/transform/statement/transform_rename.cpp +3 -4
  77. package/src/duckdb/src/planner/bind_context.cpp +2 -25
  78. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +6 -4
  79. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +3 -2
  80. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +176 -0
  81. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +57 -82
  82. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +11 -0
  83. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  84. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +2 -2
  85. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +1 -1
  86. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +11 -1
  87. package/src/duckdb/src/planner/binder.cpp +12 -23
  88. package/src/duckdb/src/planner/bound_result_modifier.cpp +26 -0
  89. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +9 -2
  90. package/src/duckdb/src/planner/expression_iterator.cpp +5 -0
  91. package/src/duckdb/src/planner/logical_operator.cpp +4 -2
  92. package/src/duckdb/src/planner/logical_operator_visitor.cpp +5 -0
  93. package/src/duckdb/src/planner/operator/logical_distinct.cpp +3 -0
  94. package/src/duckdb/src/planner/planner.cpp +2 -1
  95. package/src/duckdb/src/storage/checkpoint_manager.cpp +8 -3
  96. package/src/duckdb/src/storage/meta_block_reader.cpp +22 -0
  97. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  98. package/src/duckdb/src/storage/wal_replay.cpp +8 -5
  99. package/src/duckdb/src/storage/write_ahead_log.cpp +2 -2
  100. package/src/duckdb/src/transaction/commit_state.cpp +11 -7
  101. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -1
  102. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +8145 -8317
  103. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  104. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  105. package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
@@ -1,7 +1,9 @@
1
1
  #include "duckdb/common/types.hpp"
2
2
 
3
3
  #include "duckdb/catalog/catalog.hpp"
4
+ #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
4
5
  #include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
6
+ #include "duckdb/catalog/catalog_search_path.hpp"
5
7
  #include "duckdb/catalog/default/default_types.hpp"
6
8
  #include "duckdb/common/exception.hpp"
7
9
  #include "duckdb/common/field_writer.hpp"
@@ -17,6 +19,11 @@
17
19
  #include "duckdb/common/types/vector.hpp"
18
20
  #include "duckdb/common/unordered_map.hpp"
19
21
  #include "duckdb/function/cast_rules.hpp"
22
+ #include "duckdb/main/attached_database.hpp"
23
+ #include "duckdb/main/client_context.hpp"
24
+ #include "duckdb/main/client_data.hpp"
25
+ #include "duckdb/main/database.hpp"
26
+ #include "duckdb/main/database_manager.hpp"
20
27
  #include "duckdb/parser/keyword_helper.hpp"
21
28
  #include "duckdb/parser/parser.hpp"
22
29
 
@@ -785,7 +792,7 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
785
792
  child_list_t<LogicalType> child_types;
786
793
  for (idx_t i = 0; i < left_child_types.size(); i++) {
787
794
  auto child_type = MaxLogicalType(left_child_types[i].second, right_child_types[i].second);
788
- child_types.push_back(make_pair(left_child_types[i].first, std::move(child_type)));
795
+ child_types.emplace_back(left_child_types[i].first, std::move(child_type));
789
796
  }
790
797
 
791
798
  return LogicalType::STRUCT(std::move(child_types));
@@ -797,7 +804,7 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
797
804
  // return the "larger" type, with the most members
798
805
  return left_member_count > right_member_count ? left : right;
799
806
  }
800
- // otherwise, keep left, dont try to meld the two together.
807
+ // otherwise, keep left, don't try to meld the two together.
801
808
  return left;
802
809
  }
803
810
  // types are equal but no extra specifier: just return the type
@@ -838,17 +845,6 @@ bool ApproxEqual(double ldecimal, double rdecimal) {
838
845
  //===--------------------------------------------------------------------===//
839
846
  // Extra Type Info
840
847
  //===--------------------------------------------------------------------===//
841
- enum class ExtraTypeInfoType : uint8_t {
842
- INVALID_TYPE_INFO = 0,
843
- GENERIC_TYPE_INFO = 1,
844
- DECIMAL_TYPE_INFO = 2,
845
- STRING_TYPE_INFO = 3,
846
- LIST_TYPE_INFO = 4,
847
- STRUCT_TYPE_INFO = 5,
848
- ENUM_TYPE_INFO = 6,
849
- USER_TYPE_INFO = 7,
850
- AGGREGATE_STATE_TYPE_INFO = 8
851
- };
852
848
 
853
849
  struct ExtraTypeInfo {
854
850
  explicit ExtraTypeInfo(ExtraTypeInfoType type) : type(type) {
@@ -941,6 +937,10 @@ TypeCatalogEntry *LogicalType::GetCatalog(const LogicalType &type) {
941
937
  return ((ExtraTypeInfo &)*info).catalog_entry;
942
938
  }
943
939
 
940
+ ExtraTypeInfoType LogicalType::GetExtraTypeInfoType(const ExtraTypeInfo &type) {
941
+ return type.type;
942
+ }
943
+
944
944
  //===--------------------------------------------------------------------===//
945
945
  // Decimal Type
946
946
  //===--------------------------------------------------------------------===//
@@ -1108,7 +1108,7 @@ public:
1108
1108
  for (uint32_t i = 0; i < child_types_size; i++) {
1109
1109
  auto name = source.Read<string>();
1110
1110
  auto type = LogicalType::Deserialize(source);
1111
- child_list.push_back(make_pair(std::move(name), std::move(type)));
1111
+ child_list.emplace_back(std::move(name), std::move(type));
1112
1112
  }
1113
1113
  return make_shared<StructTypeInfo>(std::move(child_list));
1114
1114
  }
@@ -1227,8 +1227,8 @@ LogicalType LogicalType::MAP(LogicalType child) {
1227
1227
 
1228
1228
  LogicalType LogicalType::MAP(LogicalType key, LogicalType value) {
1229
1229
  child_list_t<LogicalType> child_types;
1230
- child_types.push_back({"key", std::move(key)});
1231
- child_types.push_back({"value", std::move(value)});
1230
+ child_types.emplace_back("key", std::move(key));
1231
+ child_types.emplace_back("value", std::move(value));
1232
1232
  return LogicalType::MAP(LogicalType::STRUCT(std::move(child_types)));
1233
1233
  }
1234
1234
 
@@ -1247,7 +1247,7 @@ const LogicalType &MapType::ValueType(const LogicalType &type) {
1247
1247
  //===--------------------------------------------------------------------===//
1248
1248
 
1249
1249
  LogicalType LogicalType::UNION(child_list_t<LogicalType> members) {
1250
- D_ASSERT(members.size() > 0);
1250
+ D_ASSERT(!members.empty());
1251
1251
  D_ASSERT(members.size() <= UnionType::MAX_UNION_MEMBERS);
1252
1252
  // union types always have a hidden "tag" field in front
1253
1253
  members.insert(members.begin(), {"", LogicalType::TINYINT});
@@ -1270,7 +1270,7 @@ const string &UnionType::GetMemberName(const LogicalType &type, idx_t index) {
1270
1270
  }
1271
1271
 
1272
1272
  idx_t UnionType::GetMemberCount(const LogicalType &type) {
1273
- // dont count the "tag" field
1273
+ // don't count the "tag" field
1274
1274
  return StructType::GetChildTypes(type).size() - 1;
1275
1275
  }
1276
1276
  const child_list_t<LogicalType> UnionType::CopyMemberTypes(const LogicalType &type) {
@@ -1326,13 +1326,27 @@ enum EnumDictType : uint8_t { INVALID = 0, VECTOR_DICT = 1 };
1326
1326
 
1327
1327
  struct EnumTypeInfo : public ExtraTypeInfo {
1328
1328
  explicit EnumTypeInfo(string enum_name_p, Vector &values_insert_order_p, idx_t dict_size_p)
1329
- : ExtraTypeInfo(ExtraTypeInfoType::ENUM_TYPE_INFO), dict_type(EnumDictType::VECTOR_DICT),
1330
- enum_name(std::move(enum_name_p)), values_insert_order(values_insert_order_p), dict_size(dict_size_p) {
1331
- }
1332
- EnumDictType dict_type;
1333
- string enum_name;
1334
- Vector values_insert_order;
1335
- idx_t dict_size;
1329
+ : ExtraTypeInfo(ExtraTypeInfoType::ENUM_TYPE_INFO), values_insert_order(values_insert_order_p),
1330
+ dict_type(EnumDictType::VECTOR_DICT), enum_name(std::move(enum_name_p)), dict_size(dict_size_p) {
1331
+ }
1332
+
1333
+ const EnumDictType &GetEnumDictType() {
1334
+ return dict_type;
1335
+ };
1336
+ const string &GetEnumName() {
1337
+ return enum_name;
1338
+ };
1339
+ const string GetSchemaName() const {
1340
+ return catalog_entry ? catalog_entry->schema->name : "";
1341
+ };
1342
+ const Vector &GetValuesInsertOrder() {
1343
+ return values_insert_order;
1344
+ };
1345
+ const idx_t &GetDictSize() {
1346
+ return dict_size;
1347
+ };
1348
+ EnumTypeInfo(const EnumTypeInfo &) = delete;
1349
+ EnumTypeInfo &operator=(const EnumTypeInfo &) = delete;
1336
1350
 
1337
1351
  protected:
1338
1352
  // Equalities are only used in enums with different catalog entries
@@ -1362,12 +1376,39 @@ protected:
1362
1376
  if (dict_type != EnumDictType::VECTOR_DICT) {
1363
1377
  throw InternalException("Cannot serialize non-vector dictionary ENUM types");
1364
1378
  }
1365
- writer.WriteField<uint32_t>(dict_size);
1366
- writer.WriteString(enum_name);
1367
- ((Vector &)values_insert_order).Serialize(dict_size, writer.GetSerializer());
1379
+ bool serialize_internals = GetSchemaName().empty() || writer.GetSerializer().is_query_plan;
1380
+ EnumType::Serialize(writer, *this, serialize_internals);
1368
1381
  }
1382
+
1383
+ Vector values_insert_order;
1384
+
1385
+ private:
1386
+ EnumDictType dict_type;
1387
+ string enum_name;
1388
+ idx_t dict_size;
1369
1389
  };
1370
1390
 
1391
+ // If this type is primarily stored in the catalog or not. Enums from Pandas/Factors are not in the catalog.
1392
+
1393
+ void EnumType::Serialize(FieldWriter &writer, const ExtraTypeInfo &type_info, bool serialize_internals) {
1394
+ D_ASSERT(type_info.type == ExtraTypeInfoType::ENUM_TYPE_INFO);
1395
+ auto &enum_info = (EnumTypeInfo &)type_info;
1396
+ // Store Schema Name
1397
+ writer.WriteString(enum_info.GetSchemaName());
1398
+ // Store Enum Name
1399
+ writer.WriteString(enum_info.GetEnumName());
1400
+ // Store If we are serializing the internals
1401
+ writer.WriteField<bool>(serialize_internals);
1402
+ if (serialize_internals) {
1403
+ // We must serialize the internals
1404
+ auto dict_size = enum_info.GetDictSize();
1405
+ // Store Dictionary Size
1406
+ writer.WriteField<uint32_t>(dict_size);
1407
+ // Store Vector Order By Insertion
1408
+ ((Vector &)enum_info.GetValuesInsertOrder()).Serialize(dict_size, writer.GetSerializer());
1409
+ }
1410
+ }
1411
+
1371
1412
  template <class T>
1372
1413
  struct EnumTypeInfoTemplated : public EnumTypeInfo {
1373
1414
  explicit EnumTypeInfoTemplated(const string &enum_name_p, Vector &values_insert_order_p, idx_t size_p)
@@ -1391,13 +1432,21 @@ struct EnumTypeInfoTemplated : public EnumTypeInfo {
1391
1432
  }
1392
1433
  }
1393
1434
 
1394
- static shared_ptr<EnumTypeInfoTemplated> Deserialize(FieldReader &reader, uint32_t size) {
1395
- auto enum_name = reader.ReadRequired<string>();
1435
+ static shared_ptr<EnumTypeInfoTemplated> Deserialize(FieldReader &reader, uint32_t size, string enum_name) {
1436
+
1396
1437
  Vector values_insert_order(LogicalType::VARCHAR, size);
1397
1438
  values_insert_order.Deserialize(size, reader.GetSource());
1398
1439
  return make_shared<EnumTypeInfoTemplated>(std::move(enum_name), values_insert_order, size);
1399
1440
  }
1400
1441
 
1442
+ string_map_t<T> &GetValues() {
1443
+ return values;
1444
+ }
1445
+
1446
+ EnumTypeInfoTemplated(const EnumTypeInfoTemplated &) = delete;
1447
+ EnumTypeInfoTemplated &operator=(const EnumTypeInfoTemplated &) = delete;
1448
+
1449
+ private:
1401
1450
  string_map_t<T> values;
1402
1451
  };
1403
1452
 
@@ -1405,7 +1454,7 @@ const string &EnumType::GetTypeName(const LogicalType &type) {
1405
1454
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1406
1455
  auto info = type.AuxInfo();
1407
1456
  D_ASSERT(info);
1408
- return ((EnumTypeInfo &)*info).enum_name;
1457
+ return ((EnumTypeInfo &)*info).GetEnumName();
1409
1458
  }
1410
1459
 
1411
1460
  static PhysicalType EnumVectorDictType(idx_t size) {
@@ -1454,11 +1503,11 @@ int64_t EnumType::GetPos(const LogicalType &type, const string_t &key) {
1454
1503
  auto info = type.AuxInfo();
1455
1504
  switch (type.InternalType()) {
1456
1505
  case PhysicalType::UINT8:
1457
- return TemplatedGetPos(((EnumTypeInfoTemplated<uint8_t> &)*info).values, key);
1506
+ return TemplatedGetPos(((EnumTypeInfoTemplated<uint8_t> &)*info).GetValues(), key);
1458
1507
  case PhysicalType::UINT16:
1459
- return TemplatedGetPos(((EnumTypeInfoTemplated<uint16_t> &)*info).values, key);
1508
+ return TemplatedGetPos(((EnumTypeInfoTemplated<uint16_t> &)*info).GetValues(), key);
1460
1509
  case PhysicalType::UINT32:
1461
- return TemplatedGetPos(((EnumTypeInfoTemplated<uint32_t> &)*info).values, key);
1510
+ return TemplatedGetPos(((EnumTypeInfoTemplated<uint32_t> &)*info).GetValues(), key);
1462
1511
  default:
1463
1512
  throw InternalException("ENUM can only have unsigned integers (except UINT64) as physical types");
1464
1513
  }
@@ -1466,22 +1515,22 @@ int64_t EnumType::GetPos(const LogicalType &type, const string_t &key) {
1466
1515
 
1467
1516
  const string EnumType::GetValue(const Value &val) {
1468
1517
  auto info = val.type().AuxInfo();
1469
- auto &values_insert_order = ((EnumTypeInfo &)*info).values_insert_order;
1518
+ auto &values_insert_order = ((EnumTypeInfo &)*info).GetValuesInsertOrder();
1470
1519
  return StringValue::Get(values_insert_order.GetValue(val.GetValue<uint32_t>()));
1471
1520
  }
1472
1521
 
1473
- Vector &EnumType::GetValuesInsertOrder(const LogicalType &type) {
1522
+ const Vector &EnumType::GetValuesInsertOrder(const LogicalType &type) {
1474
1523
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1475
1524
  auto info = type.AuxInfo();
1476
1525
  D_ASSERT(info);
1477
- return ((EnumTypeInfo &)*info).values_insert_order;
1526
+ return ((EnumTypeInfo &)*info).GetValuesInsertOrder();
1478
1527
  }
1479
1528
 
1480
1529
  idx_t EnumType::GetSize(const LogicalType &type) {
1481
1530
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1482
1531
  auto info = type.AuxInfo();
1483
1532
  D_ASSERT(info);
1484
- return ((EnumTypeInfo &)*info).dict_size;
1533
+ return ((EnumTypeInfo &)*info).GetDictSize();
1485
1534
  }
1486
1535
 
1487
1536
  void EnumType::SetCatalog(LogicalType &type, TypeCatalogEntry *catalog_entry) {
@@ -1497,13 +1546,18 @@ TypeCatalogEntry *EnumType::GetCatalog(const LogicalType &type) {
1497
1546
  return ((EnumTypeInfo &)*info).catalog_entry;
1498
1547
  }
1499
1548
 
1549
+ string EnumType::GetSchemaName(const LogicalType &type) {
1550
+ auto catalog_entry = EnumType::GetCatalog(type);
1551
+ return catalog_entry ? catalog_entry->schema->name : "";
1552
+ }
1553
+
1500
1554
  PhysicalType EnumType::GetPhysicalType(const LogicalType &type) {
1501
1555
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1502
1556
  auto aux_info = type.AuxInfo();
1503
1557
  D_ASSERT(aux_info);
1504
1558
  auto &info = (EnumTypeInfo &)*aux_info;
1505
- D_ASSERT(info.dict_type == EnumDictType::VECTOR_DICT);
1506
- return EnumVectorDictType(info.dict_size);
1559
+ D_ASSERT(info.GetEnumDictType() == EnumDictType::VECTOR_DICT);
1560
+ return EnumVectorDictType(info.GetDictSize());
1507
1561
  }
1508
1562
 
1509
1563
  //===--------------------------------------------------------------------===//
@@ -1549,20 +1603,40 @@ shared_ptr<ExtraTypeInfo> ExtraTypeInfo::Deserialize(FieldReader &reader) {
1549
1603
  extra_info = UserTypeInfo::Deserialize(reader);
1550
1604
  break;
1551
1605
  case ExtraTypeInfoType::ENUM_TYPE_INFO: {
1552
- auto enum_size = reader.ReadRequired<uint32_t>();
1553
- auto enum_internal_type = EnumVectorDictType(enum_size);
1554
- switch (enum_internal_type) {
1555
- case PhysicalType::UINT8:
1556
- extra_info = EnumTypeInfoTemplated<uint8_t>::Deserialize(reader, enum_size);
1557
- break;
1558
- case PhysicalType::UINT16:
1559
- extra_info = EnumTypeInfoTemplated<uint16_t>::Deserialize(reader, enum_size);
1560
- break;
1561
- case PhysicalType::UINT32:
1562
- extra_info = EnumTypeInfoTemplated<uint32_t>::Deserialize(reader, enum_size);
1606
+ auto schema_name = reader.ReadRequired<string>();
1607
+ auto enum_name = reader.ReadRequired<string>();
1608
+ auto deserialize_internals = reader.ReadRequired<bool>();
1609
+ if (!deserialize_internals) {
1610
+ // this means the enum should already be in the catalog.
1611
+ auto &client_context = reader.GetSource().GetContext();
1612
+ // See if the serializer has a catalog
1613
+ auto catalog = reader.GetSource().GetCatalog();
1614
+ if (catalog) {
1615
+ auto enum_type = catalog->GetType(client_context, schema_name, enum_name, true);
1616
+ if (enum_type != LogicalType::INVALID) {
1617
+ extra_info = enum_type.GetAuxInfoShrPtr();
1618
+ }
1619
+ }
1620
+ if (!extra_info) {
1621
+ throw InternalException("Could not find ENUM in the Catalog to deserialize");
1622
+ }
1563
1623
  break;
1564
- default:
1565
- throw InternalException("Invalid Physical Type for ENUMs");
1624
+ } else {
1625
+ auto enum_size = reader.ReadRequired<uint32_t>();
1626
+ auto enum_internal_type = EnumVectorDictType(enum_size);
1627
+ switch (enum_internal_type) {
1628
+ case PhysicalType::UINT8:
1629
+ extra_info = EnumTypeInfoTemplated<uint8_t>::Deserialize(reader, enum_size, enum_name);
1630
+ break;
1631
+ case PhysicalType::UINT16:
1632
+ extra_info = EnumTypeInfoTemplated<uint16_t>::Deserialize(reader, enum_size, enum_name);
1633
+ break;
1634
+ case PhysicalType::UINT32:
1635
+ extra_info = EnumTypeInfoTemplated<uint32_t>::Deserialize(reader, enum_size, enum_name);
1636
+ break;
1637
+ default:
1638
+ throw InternalException("Invalid Physical Type for ENUMs");
1639
+ }
1566
1640
  }
1567
1641
  } break;
1568
1642
  case ExtraTypeInfoType::AGGREGATE_STATE_TYPE_INFO:
@@ -1592,6 +1666,15 @@ void LogicalType::Serialize(Serializer &serializer) const {
1592
1666
  writer.Finalize();
1593
1667
  }
1594
1668
 
1669
+ void LogicalType::SerializeEnumType(Serializer &serializer) const {
1670
+ FieldWriter writer(serializer);
1671
+ writer.WriteField<LogicalTypeId>(id_);
1672
+ writer.WriteField<ExtraTypeInfoType>(type_info_->type);
1673
+ EnumType::Serialize(writer, *type_info_, true);
1674
+ writer.WriteString(type_info_->alias);
1675
+ writer.Finalize();
1676
+ }
1677
+
1595
1678
  LogicalType LogicalType::Deserialize(Deserializer &source) {
1596
1679
  FieldReader reader(source);
1597
1680
  auto id = reader.ReadRequired<LogicalTypeId>();
@@ -15,10 +15,11 @@ PhysicalCreateType::PhysicalCreateType(unique_ptr<CreateTypeInfo> info, idx_t es
15
15
  //===--------------------------------------------------------------------===//
16
16
  class CreateTypeGlobalState : public GlobalSinkState {
17
17
  public:
18
- explicit CreateTypeGlobalState(ClientContext &context) : collection(context, {LogicalType::VARCHAR}) {
18
+ explicit CreateTypeGlobalState(ClientContext &context) : result(LogicalType::VARCHAR) {
19
19
  }
20
-
21
- ColumnDataCollection collection;
20
+ Vector result;
21
+ idx_t size = 0;
22
+ idx_t capacity = STANDARD_VECTOR_SIZE;
22
23
  };
23
24
 
24
25
  unique_ptr<GlobalSinkState> PhysicalCreateType::GetGlobalSinkState(ClientContext &context) const {
@@ -28,7 +29,7 @@ unique_ptr<GlobalSinkState> PhysicalCreateType::GetGlobalSinkState(ClientContext
28
29
  SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p,
29
30
  DataChunk &input) const {
30
31
  auto &gstate = (CreateTypeGlobalState &)gstate_p;
31
- idx_t total_row_count = gstate.collection.Count() + input.size();
32
+ idx_t total_row_count = gstate.size + input.size();
32
33
  if (total_row_count > NumericLimits<uint32_t>::Maximum()) {
33
34
  throw InvalidInputException("Attempted to create ENUM of size %llu, which exceeds the maximum size of %llu",
34
35
  total_row_count, NumericLimits<uint32_t>::Maximum());
@@ -36,15 +37,23 @@ SinkResultType PhysicalCreateType::Sink(ExecutionContext &context, GlobalSinkSta
36
37
  UnifiedVectorFormat sdata;
37
38
  input.data[0].ToUnifiedFormat(input.size(), sdata);
38
39
 
40
+ if (total_row_count > gstate.capacity) {
41
+ // We must resize our result vector
42
+ gstate.result.Resize(gstate.capacity, gstate.capacity * 2);
43
+ gstate.capacity *= 2;
44
+ }
45
+
46
+ auto src_ptr = (string_t *)sdata.data;
47
+ auto result_ptr = FlatVector::GetData<string_t>(gstate.result);
39
48
  // Input vector has NULL value, we just throw an exception
40
49
  for (idx_t i = 0; i < input.size(); i++) {
41
50
  idx_t idx = sdata.sel->get_index(i);
42
51
  if (!sdata.validity.RowIsValid(idx)) {
43
52
  throw InvalidInputException("Attempted to create ENUM type with NULL value!");
44
53
  }
54
+ result_ptr[gstate.size++] =
55
+ StringVector::AddStringOrBlob(gstate.result, src_ptr[idx].GetDataUnsafe(), src_ptr[idx].GetSize());
45
56
  }
46
-
47
- gstate.collection.Append(input);
48
57
  return SinkResultType::NEED_MORE_INPUT;
49
58
  }
50
59
 
@@ -72,44 +81,15 @@ void PhysicalCreateType::GetData(ExecutionContext &context, DataChunk &chunk, Gl
72
81
 
73
82
  if (IsSink()) {
74
83
  D_ASSERT(info->type == LogicalType::INVALID);
75
-
76
84
  auto &g_sink_state = (CreateTypeGlobalState &)*sink_state;
77
- auto &collection = g_sink_state.collection;
78
-
79
- idx_t total_row_count = collection.Count();
80
-
81
- ColumnDataScanState scan_state;
82
- collection.InitializeScan(scan_state);
83
-
84
- DataChunk scan_chunk;
85
- collection.InitializeScanChunk(scan_chunk);
86
-
87
- Vector result(LogicalType::VARCHAR, total_row_count);
88
- auto result_ptr = FlatVector::GetData<string_t>(result);
89
-
90
- idx_t offset = 0;
91
- while (collection.Scan(scan_state, scan_chunk)) {
92
- idx_t src_row_count = scan_chunk.size();
93
- auto &src_vec = scan_chunk.data[0];
94
- D_ASSERT(src_vec.GetVectorType() == VectorType::FLAT_VECTOR);
95
- D_ASSERT(src_vec.GetType().id() == LogicalType::VARCHAR);
96
-
97
- auto src_ptr = FlatVector::GetData<string_t>(src_vec);
98
-
99
- for (idx_t i = 0; i < src_row_count; i++) {
100
- idx_t target_index = offset + i;
101
- result_ptr[target_index] =
102
- StringVector::AddStringOrBlob(result, src_ptr[i].GetDataUnsafe(), src_ptr[i].GetSize());
103
- }
104
-
105
- offset += src_row_count;
106
- }
107
-
108
- info->type = LogicalType::ENUM(info->name, result, total_row_count);
85
+ info->type = LogicalType::ENUM(info->name, g_sink_state.result, g_sink_state.size);
109
86
  }
110
87
 
111
88
  auto &catalog = Catalog::GetCatalog(context.client, info->catalog);
112
- catalog.CreateType(context.client, info.get());
89
+ auto catalog_entry = catalog.CreateType(context.client, info.get());
90
+ D_ASSERT(catalog_entry->type == CatalogType::TYPE_ENTRY);
91
+ auto catalog_type = (TypeCatalogEntry *)catalog_entry;
92
+ LogicalType::SetCatalog(info->type, catalog_type);
113
93
  state.finished = true;
114
94
  }
115
95
 
@@ -9,6 +9,7 @@
9
9
  #include "duckdb/parser/expression/comparison_expression.hpp"
10
10
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
11
11
  #include "duckdb/planner/operator/logical_aggregate.hpp"
12
+ #include "duckdb/function/function_binder.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
 
@@ -169,13 +170,20 @@ PhysicalPlanGenerator::ExtractAggregateExpressions(unique_ptr<PhysicalOperator>
169
170
  vector<unique_ptr<Expression>> expressions;
170
171
  vector<LogicalType> types;
171
172
 
173
+ // bind sorted aggregates
174
+ for (auto &aggr : aggregates) {
175
+ auto &bound_aggr = (BoundAggregateExpression &)*aggr;
176
+ if (bound_aggr.order_bys) {
177
+ // sorted aggregate!
178
+ FunctionBinder::BindSortedAggregate(context, bound_aggr, groups);
179
+ }
180
+ }
172
181
  for (auto &group : groups) {
173
182
  auto ref = make_unique<BoundReferenceExpression>(group->return_type, expressions.size());
174
183
  types.push_back(group->return_type);
175
184
  expressions.push_back(std::move(group));
176
185
  group = std::move(ref);
177
186
  }
178
-
179
187
  for (auto &aggr : aggregates) {
180
188
  auto &bound_aggr = (BoundAggregateExpression &)*aggr;
181
189
  for (auto &child : bound_aggr.children) {
@@ -9,8 +9,10 @@
9
9
 
10
10
  namespace duckdb {
11
11
 
12
- unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreateDistinctOn(unique_ptr<PhysicalOperator> child,
13
- vector<unique_ptr<Expression>> distinct_targets) {
12
+ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
13
+ D_ASSERT(op.children.size() == 1);
14
+ auto child = CreatePlan(*op.children[0]);
15
+ auto &distinct_targets = op.distinct_targets;
14
16
  D_ASSERT(child);
15
17
  D_ASSERT(!distinct_targets.empty());
16
18
 
@@ -55,6 +57,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreateDistinctOn(unique_ptr<
55
57
  FunctionBinder function_binder(context);
56
58
  auto first_aggregate = function_binder.BindAggregateFunction(
57
59
  FirstFun::GetFunction(logical_type), std::move(first_children), nullptr, AggregateType::NON_DISTINCT);
60
+ first_aggregate->order_bys = op.order_by ? op.order_by->Copy() : nullptr;
58
61
  // add the projection
59
62
  projections.push_back(make_unique<BoundReferenceExpression>(logical_type, group_count + aggregates.size()));
60
63
  // push it to the list of aggregates
@@ -81,10 +84,4 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreateDistinctOn(unique_ptr<
81
84
  return std::move(aggr_projection);
82
85
  }
83
86
 
84
- unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalDistinct &op) {
85
- D_ASSERT(op.children.size() == 1);
86
- auto plan = CreatePlan(*op.children[0]);
87
- return CreateDistinctOn(std::move(plan), std::move(op.distinct_targets));
88
- }
89
-
90
87
  } // namespace duckdb
@@ -95,6 +95,7 @@ AggregateFunction BoolOrFun::GetFunction() {
95
95
  auto fun = AggregateFunction::UnaryAggregate<BoolState, bool, bool, BoolOrFunFunction>(
96
96
  LogicalType(LogicalTypeId::BOOLEAN), LogicalType::BOOLEAN);
97
97
  fun.name = "bool_or";
98
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
98
99
  return fun;
99
100
  }
100
101
 
@@ -102,6 +103,7 @@ AggregateFunction BoolAndFun::GetFunction() {
102
103
  auto fun = AggregateFunction::UnaryAggregate<BoolState, bool, bool, BoolAndFunFunction>(
103
104
  LogicalType(LogicalTypeId::BOOLEAN), LogicalType::BOOLEAN);
104
105
  fun.name = "bool_and";
106
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
105
107
  return fun;
106
108
  }
107
109
 
@@ -76,6 +76,7 @@ AggregateFunction CountFun::GetFunction() {
76
76
  LogicalType(LogicalTypeId::ANY), LogicalType::BIGINT);
77
77
  fun.name = "count";
78
78
  fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
79
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
79
80
  return fun;
80
81
  }
81
82
 
@@ -514,6 +514,7 @@ unique_ptr<FunctionData> BindDecimalMinMax(ClientContext &context, AggregateFunc
514
514
  function.name = std::move(name);
515
515
  function.arguments[0] = decimal_type;
516
516
  function.return_type = decimal_type;
517
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
517
518
  return nullptr;
518
519
  }
519
520
 
@@ -545,6 +546,7 @@ unique_ptr<FunctionData> BindMinMax(ClientContext &context, AggregateFunction &f
545
546
  auto name = std::move(function.name);
546
547
  function = GetMinMaxOperator<OP, OP_STRING, OP_VECTOR>(input_type);
547
548
  function.name = std::move(name);
549
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
548
550
  if (function.bind) {
549
551
  return function.bind(context, function, arguments);
550
552
  } else {
@@ -110,6 +110,7 @@ AggregateFunction SumFun::GetSumAggregate(PhysicalType type) {
110
110
  case PhysicalType::INT16: {
111
111
  auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int16_t, hugeint_t, IntegerSumOperation>(
112
112
  LogicalType::SMALLINT, LogicalType::HUGEINT);
113
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
113
114
  return function;
114
115
  }
115
116
 
@@ -118,6 +119,7 @@ AggregateFunction SumFun::GetSumAggregate(PhysicalType type) {
118
119
  AggregateFunction::UnaryAggregate<SumState<hugeint_t>, int32_t, hugeint_t, SumToHugeintOperation>(
119
120
  LogicalType::INTEGER, LogicalType::HUGEINT);
120
121
  function.statistics = SumPropagateStats;
122
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
121
123
  return function;
122
124
  }
123
125
  case PhysicalType::INT64: {
@@ -125,12 +127,14 @@ AggregateFunction SumFun::GetSumAggregate(PhysicalType type) {
125
127
  AggregateFunction::UnaryAggregate<SumState<hugeint_t>, int64_t, hugeint_t, SumToHugeintOperation>(
126
128
  LogicalType::BIGINT, LogicalType::HUGEINT);
127
129
  function.statistics = SumPropagateStats;
130
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
128
131
  return function;
129
132
  }
130
133
  case PhysicalType::INT128: {
131
134
  auto function =
132
135
  AggregateFunction::UnaryAggregate<SumState<hugeint_t>, hugeint_t, hugeint_t, HugeintSumOperation>(
133
136
  LogicalType::HUGEINT, LogicalType::HUGEINT);
137
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
134
138
  return function;
135
139
  }
136
140
  default:
@@ -144,12 +148,14 @@ AggregateFunction SumFun::GetSumAggregateNoOverflow(PhysicalType type) {
144
148
  auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int32_t, hugeint_t, IntegerSumOperation>(
145
149
  LogicalType::INTEGER, LogicalType::HUGEINT);
146
150
  function.name = "sum_no_overflow";
151
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
147
152
  return function;
148
153
  }
149
154
  case PhysicalType::INT64: {
150
155
  auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int64_t, hugeint_t, IntegerSumOperation>(
151
156
  LogicalType::BIGINT, LogicalType::HUGEINT);
152
157
  function.name = "sum_no_overflow";
158
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
153
159
  return function;
154
160
  }
155
161
  default:
@@ -164,6 +170,7 @@ unique_ptr<FunctionData> BindDecimalSum(ClientContext &context, AggregateFunctio
164
170
  function.name = "sum";
165
171
  function.arguments[0] = decimal_type;
166
172
  function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type));
173
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
167
174
  return nullptr;
168
175
  }
169
176
 
@@ -174,6 +181,7 @@ unique_ptr<FunctionData> BindDecimalSumNoOverflow(ClientContext &context, Aggreg
174
181
  function.name = "sum_no_overflow";
175
182
  function.arguments[0] = decimal_type;
176
183
  function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type));
184
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
177
185
  return nullptr;
178
186
  }
179
187