duckdb 0.7.2-dev457.0 → 0.7.2-dev586.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/binding.gyp +9 -9
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/parquet-extension.cpp +1 -0
  4. package/src/duckdb/src/catalog/catalog.cpp +13 -0
  5. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -21
  6. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +8 -2
  7. package/src/duckdb/src/catalog/catalog_set.cpp +1 -0
  8. package/src/duckdb/src/common/arrow/arrow_appender.cpp +48 -4
  9. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  10. package/src/duckdb/src/common/field_writer.cpp +1 -0
  11. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +4 -0
  12. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +15 -2
  13. package/src/duckdb/src/common/types.cpp +136 -53
  14. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +20 -40
  15. package/src/duckdb/src/function/table/arrow.cpp +3 -0
  16. package/src/duckdb/src/function/table/arrow_conversion.cpp +18 -0
  17. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  18. package/src/duckdb/src/function/table_function.cpp +11 -11
  19. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +3 -0
  20. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
  21. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +12 -4
  22. package/src/duckdb/src/include/duckdb/common/{http_stats.hpp → http_state.hpp} +18 -4
  23. package/src/duckdb/src/include/duckdb/common/serializer/buffered_deserializer.hpp +4 -2
  24. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +8 -2
  25. package/src/duckdb/src/include/duckdb/common/serializer.hpp +13 -0
  26. package/src/duckdb/src/include/duckdb/common/types.hpp +27 -1
  27. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +1 -0
  28. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +12 -1
  29. package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
  30. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -3
  31. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -0
  32. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  33. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +3 -0
  34. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +6 -0
  35. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +1 -1
  36. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +7 -0
  37. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +1 -1
  38. package/src/duckdb/src/main/client_context.cpp +30 -32
  39. package/src/duckdb/src/main/client_data.cpp +7 -6
  40. package/src/duckdb/src/main/database.cpp +9 -0
  41. package/src/duckdb/src/main/query_profiler.cpp +17 -15
  42. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +9 -2
  43. package/src/duckdb/src/parser/transform/statement/transform_rename.cpp +3 -4
  44. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +11 -1
  45. package/src/duckdb/src/planner/logical_operator.cpp +4 -2
  46. package/src/duckdb/src/planner/planner.cpp +2 -1
  47. package/src/duckdb/src/storage/checkpoint_manager.cpp +8 -3
  48. package/src/duckdb/src/storage/meta_block_reader.cpp +22 -0
  49. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  50. package/src/duckdb/src/storage/wal_replay.cpp +8 -5
  51. package/src/duckdb/src/storage/write_ahead_log.cpp +2 -2
  52. package/src/duckdb/src/transaction/commit_state.cpp +11 -7
  53. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1152 -1152
  54. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
package/binding.gyp CHANGED
@@ -223,18 +223,18 @@
223
223
  "src/duckdb/third_party/zstd/compress/zstd_lazy.cpp",
224
224
  "src/duckdb/third_party/zstd/compress/zstd_ldm.cpp",
225
225
  "src/duckdb/third_party/zstd/compress/zstd_opt.cpp",
226
- "src/duckdb/extension/icu/./icu-dateadd.cpp",
227
- "src/duckdb/extension/icu/./icu-table-range.cpp",
228
- "src/duckdb/extension/icu/./icu-datetrunc.cpp",
229
- "src/duckdb/extension/icu/./icu-strptime.cpp",
230
- "src/duckdb/extension/icu/./icu-datefunc.cpp",
231
- "src/duckdb/extension/icu/./icu-extension.cpp",
232
- "src/duckdb/extension/icu/./icu-makedate.cpp",
233
226
  "src/duckdb/extension/icu/./icu-list-range.cpp",
227
+ "src/duckdb/extension/icu/./icu-datepart.cpp",
228
+ "src/duckdb/extension/icu/./icu-timebucket.cpp",
234
229
  "src/duckdb/extension/icu/./icu-timezone.cpp",
230
+ "src/duckdb/extension/icu/./icu-datefunc.cpp",
231
+ "src/duckdb/extension/icu/./icu-makedate.cpp",
232
+ "src/duckdb/extension/icu/./icu-datetrunc.cpp",
233
+ "src/duckdb/extension/icu/./icu-table-range.cpp",
234
+ "src/duckdb/extension/icu/./icu-extension.cpp",
235
+ "src/duckdb/extension/icu/./icu-dateadd.cpp",
235
236
  "src/duckdb/extension/icu/./icu-datesub.cpp",
236
- "src/duckdb/extension/icu/./icu-timebucket.cpp",
237
- "src/duckdb/extension/icu/./icu-datepart.cpp",
237
+ "src/duckdb/extension/icu/./icu-strptime.cpp",
238
238
  "src/duckdb/ub_extension_icu_third_party_icu_common.cpp",
239
239
  "src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
240
240
  "src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev457.0",
5
+ "version": "0.7.2-dev586.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -354,6 +354,7 @@ public:
354
354
  return_types.assign(union_col_types.begin(), union_col_types.end());
355
355
  result->SetInitialReader(result->union_readers[0]);
356
356
  D_ASSERT(names.size() == return_types.size());
357
+ result->types = union_col_types;
357
358
 
358
359
  return std::move(result);
359
360
  }
@@ -625,6 +625,19 @@ vector<SchemaCatalogEntry *> Catalog::GetSchemas(ClientContext &context) {
625
625
  return schemas;
626
626
  }
627
627
 
628
+ bool Catalog::TypeExists(ClientContext &context, const string &catalog_name, const string &schema, const string &name) {
629
+ CatalogEntry *entry;
630
+ entry = GetEntry(context, CatalogType::TYPE_ENTRY, catalog_name, schema, name, true);
631
+ if (!entry) {
632
+ // look in the system catalog
633
+ entry = GetEntry(context, CatalogType::TYPE_ENTRY, SYSTEM_CATALOG, schema, name, true);
634
+ if (!entry) {
635
+ return false;
636
+ }
637
+ }
638
+ return true;
639
+ }
640
+
628
641
  vector<SchemaCatalogEntry *> Catalog::GetSchemas(ClientContext &context, const string &catalog_name) {
629
642
  vector<Catalog *> catalogs;
630
643
  if (IsInvalidCatalog(catalog_name)) {
@@ -682,27 +682,8 @@ void DuckTableEntry::SetAsRoot() {
682
682
  storage->info->table = name;
683
683
  }
684
684
 
685
- void DuckTableEntry::CommitAlter(AlterInfo &info) {
686
- D_ASSERT(info.type == AlterType::ALTER_TABLE);
687
- auto &alter_table = (AlterTableInfo &)info;
688
- string column_name;
689
- switch (alter_table.alter_table_type) {
690
- case AlterTableType::REMOVE_COLUMN: {
691
- auto &remove_info = (RemoveColumnInfo &)alter_table;
692
- column_name = remove_info.removed_column;
693
- break;
694
- }
695
- case AlterTableType::ALTER_COLUMN_TYPE: {
696
- auto &change_info = (ChangeColumnTypeInfo &)alter_table;
697
- column_name = change_info.column_name;
698
- break;
699
- }
700
- default:
701
- break;
702
- }
703
- if (column_name.empty()) {
704
- return;
705
- }
685
+ void DuckTableEntry::CommitAlter(string &column_name) {
686
+ D_ASSERT(!column_name.empty());
706
687
  idx_t removed_index = DConstants::INVALID_INDEX;
707
688
  for (auto &col : columns.Logical()) {
708
689
  if (col.Name() == column_name) {
@@ -23,7 +23,13 @@ void TypeCatalogEntry::Serialize(Serializer &serializer) {
23
23
  FieldWriter writer(serializer);
24
24
  writer.WriteString(schema->name);
25
25
  writer.WriteString(name);
26
- writer.WriteSerializable(user_type);
26
+ if (user_type.id() == LogicalTypeId::ENUM) {
27
+ // We have to serialize Enum Values
28
+ writer.AddField();
29
+ user_type.SerializeEnumType(writer.GetSerializer());
30
+ } else {
31
+ writer.WriteSerializable(user_type);
32
+ }
27
33
  writer.Finalize();
28
34
  }
29
35
 
@@ -43,7 +49,7 @@ string TypeCatalogEntry::ToSQL() {
43
49
  std::stringstream ss;
44
50
  switch (user_type.id()) {
45
51
  case (LogicalTypeId::ENUM): {
46
- Vector values_insert_order(EnumType::GetValuesInsertOrder(user_type));
52
+ auto &values_insert_order = EnumType::GetValuesInsertOrder(user_type);
47
53
  idx_t size = EnumType::GetSize(user_type);
48
54
  ss << "CREATE TYPE ";
49
55
  ss << KeywordHelper::WriteOptionallyQuoted(name);
@@ -258,6 +258,7 @@ bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name,
258
258
 
259
259
  // serialize the AlterInfo into a temporary buffer
260
260
  BufferedSerializer serializer;
261
+ serializer.WriteString(alter_info->GetColumnName());
261
262
  alter_info->Serialize(serializer);
262
263
  BinaryData serialized_alter = serializer.GetData();
263
264
 
@@ -4,6 +4,7 @@
4
4
  #include "duckdb/common/array.hpp"
5
5
  #include "duckdb/common/types/interval.hpp"
6
6
  #include "duckdb/common/types/uuid.hpp"
7
+ #include "duckdb/function/table/arrow.hpp"
7
8
 
8
9
  namespace duckdb {
9
10
 
@@ -127,7 +128,11 @@ struct ArrowScalarConverter {
127
128
  struct ArrowIntervalConverter {
128
129
  template <class TGT, class SRC>
129
130
  static TGT Operation(SRC input) {
130
- return Interval::GetMilli(input);
131
+ ArrowInterval result;
132
+ result.months = input.months;
133
+ result.days = input.days;
134
+ result.nanoseconds = input.micros * Interval::NANOS_PER_MICRO;
135
+ return result;
131
136
  }
132
137
 
133
138
  static bool SkipNulls() {
@@ -136,7 +141,6 @@ struct ArrowIntervalConverter {
136
141
 
137
142
  template <class TGT>
138
143
  static void SetNull(TGT &value) {
139
- value = 0;
140
144
  }
141
145
  };
142
146
 
@@ -185,11 +189,51 @@ struct ArrowScalarData : public ArrowScalarBaseData<TGT, SRC, OP> {
185
189
  //===--------------------------------------------------------------------===//
186
190
  template <class TGT>
187
191
  struct ArrowEnumData : public ArrowScalarBaseData<TGT> {
192
+ static idx_t GetLength(string_t input) {
193
+ return input.GetSize();
194
+ }
195
+ static void WriteData(data_ptr_t target, string_t input) {
196
+ memcpy(target, input.GetDataUnsafe(), input.GetSize());
197
+ }
198
+ static void EnumAppendVector(ArrowAppendData &append_data, const Vector &input, idx_t size) {
199
+ D_ASSERT(input.GetVectorType() == VectorType::FLAT_VECTOR);
200
+
201
+ // resize the validity mask and set up the validity buffer for iteration
202
+ ResizeValidity(append_data.validity, append_data.row_count + size);
203
+
204
+ // resize the offset buffer - the offset buffer holds the offsets into the child array
205
+ append_data.main_buffer.resize(append_data.main_buffer.size() + sizeof(uint32_t) * (size + 1));
206
+ auto data = (string_t *)FlatVector::GetData<string_t>(input);
207
+ auto offset_data = (uint32_t *)append_data.main_buffer.data();
208
+ if (append_data.row_count == 0) {
209
+ // first entry
210
+ offset_data[0] = 0;
211
+ }
212
+ // now append the string data to the auxiliary buffer
213
+ // the auxiliary buffer's length depends on the string lengths, so we resize as required
214
+ auto last_offset = offset_data[append_data.row_count];
215
+ for (idx_t i = 0; i < size; i++) {
216
+ auto offset_idx = append_data.row_count + i + 1;
217
+
218
+ auto string_length = GetLength(data[i]);
219
+
220
+ // append the offset data
221
+ auto current_offset = last_offset + string_length;
222
+ offset_data[offset_idx] = current_offset;
223
+
224
+ // resize the string buffer if required, and write the string data
225
+ append_data.aux_buffer.resize(current_offset);
226
+ WriteData(append_data.aux_buffer.data() + last_offset, data[i]);
227
+
228
+ last_offset = current_offset;
229
+ }
230
+ append_data.row_count += size;
231
+ }
188
232
  static void Initialize(ArrowAppendData &result, const LogicalType &type, idx_t capacity) {
189
233
  result.main_buffer.reserve(capacity * sizeof(TGT));
190
234
  // construct the enum child data
191
235
  auto enum_data = InitializeArrowChild(LogicalType::VARCHAR, EnumType::GetSize(type));
192
- enum_data->append_vector(*enum_data, EnumType::GetValuesInsertOrder(type), EnumType::GetSize(type));
236
+ EnumAppendVector(*enum_data, EnumType::GetValuesInsertOrder(type), EnumType::GetSize(type));
193
237
  result.child_data.push_back(std::move(enum_data));
194
238
  }
195
239
 
@@ -629,7 +673,7 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
629
673
  }
630
674
  break;
631
675
  case LogicalTypeId::INTERVAL:
632
- InitializeFunctionPointers<ArrowScalarData<int64_t, interval_t, ArrowIntervalConverter>>(append_data);
676
+ InitializeFunctionPointers<ArrowScalarData<ArrowInterval, interval_t, ArrowIntervalConverter>>(append_data);
633
677
  break;
634
678
  case LogicalTypeId::STRUCT:
635
679
  InitializeFunctionPointers<ArrowStructData>(append_data);
@@ -150,7 +150,7 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
150
150
  child.format = "tsm:";
151
151
  break;
152
152
  case LogicalTypeId::INTERVAL:
153
- child.format = "tDm";
153
+ child.format = "tin";
154
154
  break;
155
155
  case LogicalTypeId::DECIMAL: {
156
156
  uint8_t width, scale;
@@ -8,6 +8,7 @@ namespace duckdb {
8
8
  FieldWriter::FieldWriter(Serializer &serializer_p)
9
9
  : serializer(serializer_p), buffer(make_unique<BufferedSerializer>()), field_count(0), finalized(false) {
10
10
  buffer->SetVersion(serializer.GetVersion());
11
+ buffer->is_query_plan = serializer.is_query_plan;
11
12
  }
12
13
 
13
14
  FieldWriter::~FieldWriter() {
@@ -20,4 +20,8 @@ void BufferedDeserializer::ReadData(data_ptr_t buffer, idx_t read_size) {
20
20
  ptr += read_size;
21
21
  }
22
22
 
23
+ ClientContext &BufferedContextDeserializer::GetContext() {
24
+ return context;
25
+ }
26
+
23
27
  } // namespace duckdb
@@ -7,8 +7,10 @@
7
7
 
8
8
  namespace duckdb {
9
9
 
10
- BufferedFileReader::BufferedFileReader(FileSystem &fs, const char *path, FileLockType lock_type, FileOpener *opener)
11
- : fs(fs), data(unique_ptr<data_t[]>(new data_t[FILE_BUFFER_SIZE])), offset(0), read_data(0), total_read(0) {
10
+ BufferedFileReader::BufferedFileReader(FileSystem &fs, const char *path, ClientContext *context, FileLockType lock_type,
11
+ FileOpener *opener)
12
+ : fs(fs), data(unique_ptr<data_t[]>(new data_t[FILE_BUFFER_SIZE])), offset(0), read_data(0), context(context),
13
+ total_read(0) {
12
14
  handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ, lock_type, FileSystem::DEFAULT_COMPRESSION, opener);
13
15
  file_size = fs.GetFileSize(*handle);
14
16
  }
@@ -54,4 +56,15 @@ uint64_t BufferedFileReader::CurrentOffset() {
54
56
  return total_read + offset;
55
57
  }
56
58
 
59
+ ClientContext &BufferedFileReader::GetContext() {
60
+ if (!context) {
61
+ throw InternalException("Trying to acquire a client context that does not exist");
62
+ }
63
+ return *context;
64
+ }
65
+
66
+ Catalog *BufferedFileReader::GetCatalog() {
67
+ return catalog;
68
+ }
69
+
57
70
  } // namespace duckdb
@@ -1,7 +1,9 @@
1
1
  #include "duckdb/common/types.hpp"
2
2
 
3
3
  #include "duckdb/catalog/catalog.hpp"
4
+ #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
4
5
  #include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp"
6
+ #include "duckdb/catalog/catalog_search_path.hpp"
5
7
  #include "duckdb/catalog/default/default_types.hpp"
6
8
  #include "duckdb/common/exception.hpp"
7
9
  #include "duckdb/common/field_writer.hpp"
@@ -17,6 +19,11 @@
17
19
  #include "duckdb/common/types/vector.hpp"
18
20
  #include "duckdb/common/unordered_map.hpp"
19
21
  #include "duckdb/function/cast_rules.hpp"
22
+ #include "duckdb/main/attached_database.hpp"
23
+ #include "duckdb/main/client_context.hpp"
24
+ #include "duckdb/main/client_data.hpp"
25
+ #include "duckdb/main/database.hpp"
26
+ #include "duckdb/main/database_manager.hpp"
20
27
  #include "duckdb/parser/keyword_helper.hpp"
21
28
  #include "duckdb/parser/parser.hpp"
22
29
 
@@ -785,7 +792,7 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
785
792
  child_list_t<LogicalType> child_types;
786
793
  for (idx_t i = 0; i < left_child_types.size(); i++) {
787
794
  auto child_type = MaxLogicalType(left_child_types[i].second, right_child_types[i].second);
788
- child_types.push_back(make_pair(left_child_types[i].first, std::move(child_type)));
795
+ child_types.emplace_back(left_child_types[i].first, std::move(child_type));
789
796
  }
790
797
 
791
798
  return LogicalType::STRUCT(std::move(child_types));
@@ -797,7 +804,7 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
797
804
  // return the "larger" type, with the most members
798
805
  return left_member_count > right_member_count ? left : right;
799
806
  }
800
- // otherwise, keep left, dont try to meld the two together.
807
+ // otherwise, keep left, don't try to meld the two together.
801
808
  return left;
802
809
  }
803
810
  // types are equal but no extra specifier: just return the type
@@ -838,17 +845,6 @@ bool ApproxEqual(double ldecimal, double rdecimal) {
838
845
  //===--------------------------------------------------------------------===//
839
846
  // Extra Type Info
840
847
  //===--------------------------------------------------------------------===//
841
- enum class ExtraTypeInfoType : uint8_t {
842
- INVALID_TYPE_INFO = 0,
843
- GENERIC_TYPE_INFO = 1,
844
- DECIMAL_TYPE_INFO = 2,
845
- STRING_TYPE_INFO = 3,
846
- LIST_TYPE_INFO = 4,
847
- STRUCT_TYPE_INFO = 5,
848
- ENUM_TYPE_INFO = 6,
849
- USER_TYPE_INFO = 7,
850
- AGGREGATE_STATE_TYPE_INFO = 8
851
- };
852
848
 
853
849
  struct ExtraTypeInfo {
854
850
  explicit ExtraTypeInfo(ExtraTypeInfoType type) : type(type) {
@@ -941,6 +937,10 @@ TypeCatalogEntry *LogicalType::GetCatalog(const LogicalType &type) {
941
937
  return ((ExtraTypeInfo &)*info).catalog_entry;
942
938
  }
943
939
 
940
+ ExtraTypeInfoType LogicalType::GetExtraTypeInfoType(const ExtraTypeInfo &type) {
941
+ return type.type;
942
+ }
943
+
944
944
  //===--------------------------------------------------------------------===//
945
945
  // Decimal Type
946
946
  //===--------------------------------------------------------------------===//
@@ -1108,7 +1108,7 @@ public:
1108
1108
  for (uint32_t i = 0; i < child_types_size; i++) {
1109
1109
  auto name = source.Read<string>();
1110
1110
  auto type = LogicalType::Deserialize(source);
1111
- child_list.push_back(make_pair(std::move(name), std::move(type)));
1111
+ child_list.emplace_back(std::move(name), std::move(type));
1112
1112
  }
1113
1113
  return make_shared<StructTypeInfo>(std::move(child_list));
1114
1114
  }
@@ -1227,8 +1227,8 @@ LogicalType LogicalType::MAP(LogicalType child) {
1227
1227
 
1228
1228
  LogicalType LogicalType::MAP(LogicalType key, LogicalType value) {
1229
1229
  child_list_t<LogicalType> child_types;
1230
- child_types.push_back({"key", std::move(key)});
1231
- child_types.push_back({"value", std::move(value)});
1230
+ child_types.emplace_back("key", std::move(key));
1231
+ child_types.emplace_back("value", std::move(value));
1232
1232
  return LogicalType::MAP(LogicalType::STRUCT(std::move(child_types)));
1233
1233
  }
1234
1234
 
@@ -1247,7 +1247,7 @@ const LogicalType &MapType::ValueType(const LogicalType &type) {
1247
1247
  //===--------------------------------------------------------------------===//
1248
1248
 
1249
1249
  LogicalType LogicalType::UNION(child_list_t<LogicalType> members) {
1250
- D_ASSERT(members.size() > 0);
1250
+ D_ASSERT(!members.empty());
1251
1251
  D_ASSERT(members.size() <= UnionType::MAX_UNION_MEMBERS);
1252
1252
  // union types always have a hidden "tag" field in front
1253
1253
  members.insert(members.begin(), {"", LogicalType::TINYINT});
@@ -1270,7 +1270,7 @@ const string &UnionType::GetMemberName(const LogicalType &type, idx_t index) {
1270
1270
  }
1271
1271
 
1272
1272
  idx_t UnionType::GetMemberCount(const LogicalType &type) {
1273
- // dont count the "tag" field
1273
+ // don't count the "tag" field
1274
1274
  return StructType::GetChildTypes(type).size() - 1;
1275
1275
  }
1276
1276
  const child_list_t<LogicalType> UnionType::CopyMemberTypes(const LogicalType &type) {
@@ -1326,13 +1326,27 @@ enum EnumDictType : uint8_t { INVALID = 0, VECTOR_DICT = 1 };
1326
1326
 
1327
1327
  struct EnumTypeInfo : public ExtraTypeInfo {
1328
1328
  explicit EnumTypeInfo(string enum_name_p, Vector &values_insert_order_p, idx_t dict_size_p)
1329
- : ExtraTypeInfo(ExtraTypeInfoType::ENUM_TYPE_INFO), dict_type(EnumDictType::VECTOR_DICT),
1330
- enum_name(std::move(enum_name_p)), values_insert_order(values_insert_order_p), dict_size(dict_size_p) {
1331
- }
1332
- EnumDictType dict_type;
1333
- string enum_name;
1334
- Vector values_insert_order;
1335
- idx_t dict_size;
1329
+ : ExtraTypeInfo(ExtraTypeInfoType::ENUM_TYPE_INFO), values_insert_order(values_insert_order_p),
1330
+ dict_type(EnumDictType::VECTOR_DICT), enum_name(std::move(enum_name_p)), dict_size(dict_size_p) {
1331
+ }
1332
+
1333
+ const EnumDictType &GetEnumDictType() {
1334
+ return dict_type;
1335
+ };
1336
+ const string &GetEnumName() {
1337
+ return enum_name;
1338
+ };
1339
+ const string GetSchemaName() const {
1340
+ return catalog_entry ? catalog_entry->schema->name : "";
1341
+ };
1342
+ const Vector &GetValuesInsertOrder() {
1343
+ return values_insert_order;
1344
+ };
1345
+ const idx_t &GetDictSize() {
1346
+ return dict_size;
1347
+ };
1348
+ EnumTypeInfo(const EnumTypeInfo &) = delete;
1349
+ EnumTypeInfo &operator=(const EnumTypeInfo &) = delete;
1336
1350
 
1337
1351
  protected:
1338
1352
  // Equalities are only used in enums with different catalog entries
@@ -1362,12 +1376,39 @@ protected:
1362
1376
  if (dict_type != EnumDictType::VECTOR_DICT) {
1363
1377
  throw InternalException("Cannot serialize non-vector dictionary ENUM types");
1364
1378
  }
1365
- writer.WriteField<uint32_t>(dict_size);
1366
- writer.WriteString(enum_name);
1367
- ((Vector &)values_insert_order).Serialize(dict_size, writer.GetSerializer());
1379
+ bool serialize_internals = GetSchemaName().empty() || writer.GetSerializer().is_query_plan;
1380
+ EnumType::Serialize(writer, *this, serialize_internals);
1368
1381
  }
1382
+
1383
+ Vector values_insert_order;
1384
+
1385
+ private:
1386
+ EnumDictType dict_type;
1387
+ string enum_name;
1388
+ idx_t dict_size;
1369
1389
  };
1370
1390
 
1391
+ // If this type is primarily stored in the catalog or not. Enums from Pandas/Factors are not in the catalog.
1392
+
1393
+ void EnumType::Serialize(FieldWriter &writer, const ExtraTypeInfo &type_info, bool serialize_internals) {
1394
+ D_ASSERT(type_info.type == ExtraTypeInfoType::ENUM_TYPE_INFO);
1395
+ auto &enum_info = (EnumTypeInfo &)type_info;
1396
+ // Store Schema Name
1397
+ writer.WriteString(enum_info.GetSchemaName());
1398
+ // Store Enum Name
1399
+ writer.WriteString(enum_info.GetEnumName());
1400
+ // Store If we are serializing the internals
1401
+ writer.WriteField<bool>(serialize_internals);
1402
+ if (serialize_internals) {
1403
+ // We must serialize the internals
1404
+ auto dict_size = enum_info.GetDictSize();
1405
+ // Store Dictionary Size
1406
+ writer.WriteField<uint32_t>(dict_size);
1407
+ // Store Vector Order By Insertion
1408
+ ((Vector &)enum_info.GetValuesInsertOrder()).Serialize(dict_size, writer.GetSerializer());
1409
+ }
1410
+ }
1411
+
1371
1412
  template <class T>
1372
1413
  struct EnumTypeInfoTemplated : public EnumTypeInfo {
1373
1414
  explicit EnumTypeInfoTemplated(const string &enum_name_p, Vector &values_insert_order_p, idx_t size_p)
@@ -1391,13 +1432,21 @@ struct EnumTypeInfoTemplated : public EnumTypeInfo {
1391
1432
  }
1392
1433
  }
1393
1434
 
1394
- static shared_ptr<EnumTypeInfoTemplated> Deserialize(FieldReader &reader, uint32_t size) {
1395
- auto enum_name = reader.ReadRequired<string>();
1435
+ static shared_ptr<EnumTypeInfoTemplated> Deserialize(FieldReader &reader, uint32_t size, string enum_name) {
1436
+
1396
1437
  Vector values_insert_order(LogicalType::VARCHAR, size);
1397
1438
  values_insert_order.Deserialize(size, reader.GetSource());
1398
1439
  return make_shared<EnumTypeInfoTemplated>(std::move(enum_name), values_insert_order, size);
1399
1440
  }
1400
1441
 
1442
+ string_map_t<T> &GetValues() {
1443
+ return values;
1444
+ }
1445
+
1446
+ EnumTypeInfoTemplated(const EnumTypeInfoTemplated &) = delete;
1447
+ EnumTypeInfoTemplated &operator=(const EnumTypeInfoTemplated &) = delete;
1448
+
1449
+ private:
1401
1450
  string_map_t<T> values;
1402
1451
  };
1403
1452
 
@@ -1405,7 +1454,7 @@ const string &EnumType::GetTypeName(const LogicalType &type) {
1405
1454
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1406
1455
  auto info = type.AuxInfo();
1407
1456
  D_ASSERT(info);
1408
- return ((EnumTypeInfo &)*info).enum_name;
1457
+ return ((EnumTypeInfo &)*info).GetEnumName();
1409
1458
  }
1410
1459
 
1411
1460
  static PhysicalType EnumVectorDictType(idx_t size) {
@@ -1454,11 +1503,11 @@ int64_t EnumType::GetPos(const LogicalType &type, const string_t &key) {
1454
1503
  auto info = type.AuxInfo();
1455
1504
  switch (type.InternalType()) {
1456
1505
  case PhysicalType::UINT8:
1457
- return TemplatedGetPos(((EnumTypeInfoTemplated<uint8_t> &)*info).values, key);
1506
+ return TemplatedGetPos(((EnumTypeInfoTemplated<uint8_t> &)*info).GetValues(), key);
1458
1507
  case PhysicalType::UINT16:
1459
- return TemplatedGetPos(((EnumTypeInfoTemplated<uint16_t> &)*info).values, key);
1508
+ return TemplatedGetPos(((EnumTypeInfoTemplated<uint16_t> &)*info).GetValues(), key);
1460
1509
  case PhysicalType::UINT32:
1461
- return TemplatedGetPos(((EnumTypeInfoTemplated<uint32_t> &)*info).values, key);
1510
+ return TemplatedGetPos(((EnumTypeInfoTemplated<uint32_t> &)*info).GetValues(), key);
1462
1511
  default:
1463
1512
  throw InternalException("ENUM can only have unsigned integers (except UINT64) as physical types");
1464
1513
  }
@@ -1466,22 +1515,22 @@ int64_t EnumType::GetPos(const LogicalType &type, const string_t &key) {
1466
1515
 
1467
1516
  const string EnumType::GetValue(const Value &val) {
1468
1517
  auto info = val.type().AuxInfo();
1469
- auto &values_insert_order = ((EnumTypeInfo &)*info).values_insert_order;
1518
+ auto &values_insert_order = ((EnumTypeInfo &)*info).GetValuesInsertOrder();
1470
1519
  return StringValue::Get(values_insert_order.GetValue(val.GetValue<uint32_t>()));
1471
1520
  }
1472
1521
 
1473
- Vector &EnumType::GetValuesInsertOrder(const LogicalType &type) {
1522
+ const Vector &EnumType::GetValuesInsertOrder(const LogicalType &type) {
1474
1523
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1475
1524
  auto info = type.AuxInfo();
1476
1525
  D_ASSERT(info);
1477
- return ((EnumTypeInfo &)*info).values_insert_order;
1526
+ return ((EnumTypeInfo &)*info).GetValuesInsertOrder();
1478
1527
  }
1479
1528
 
1480
1529
  idx_t EnumType::GetSize(const LogicalType &type) {
1481
1530
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1482
1531
  auto info = type.AuxInfo();
1483
1532
  D_ASSERT(info);
1484
- return ((EnumTypeInfo &)*info).dict_size;
1533
+ return ((EnumTypeInfo &)*info).GetDictSize();
1485
1534
  }
1486
1535
 
1487
1536
  void EnumType::SetCatalog(LogicalType &type, TypeCatalogEntry *catalog_entry) {
@@ -1497,13 +1546,18 @@ TypeCatalogEntry *EnumType::GetCatalog(const LogicalType &type) {
1497
1546
  return ((EnumTypeInfo &)*info).catalog_entry;
1498
1547
  }
1499
1548
 
1549
+ string EnumType::GetSchemaName(const LogicalType &type) {
1550
+ auto catalog_entry = EnumType::GetCatalog(type);
1551
+ return catalog_entry ? catalog_entry->schema->name : "";
1552
+ }
1553
+
1500
1554
  PhysicalType EnumType::GetPhysicalType(const LogicalType &type) {
1501
1555
  D_ASSERT(type.id() == LogicalTypeId::ENUM);
1502
1556
  auto aux_info = type.AuxInfo();
1503
1557
  D_ASSERT(aux_info);
1504
1558
  auto &info = (EnumTypeInfo &)*aux_info;
1505
- D_ASSERT(info.dict_type == EnumDictType::VECTOR_DICT);
1506
- return EnumVectorDictType(info.dict_size);
1559
+ D_ASSERT(info.GetEnumDictType() == EnumDictType::VECTOR_DICT);
1560
+ return EnumVectorDictType(info.GetDictSize());
1507
1561
  }
1508
1562
 
1509
1563
  //===--------------------------------------------------------------------===//
@@ -1549,20 +1603,40 @@ shared_ptr<ExtraTypeInfo> ExtraTypeInfo::Deserialize(FieldReader &reader) {
1549
1603
  extra_info = UserTypeInfo::Deserialize(reader);
1550
1604
  break;
1551
1605
  case ExtraTypeInfoType::ENUM_TYPE_INFO: {
1552
- auto enum_size = reader.ReadRequired<uint32_t>();
1553
- auto enum_internal_type = EnumVectorDictType(enum_size);
1554
- switch (enum_internal_type) {
1555
- case PhysicalType::UINT8:
1556
- extra_info = EnumTypeInfoTemplated<uint8_t>::Deserialize(reader, enum_size);
1557
- break;
1558
- case PhysicalType::UINT16:
1559
- extra_info = EnumTypeInfoTemplated<uint16_t>::Deserialize(reader, enum_size);
1560
- break;
1561
- case PhysicalType::UINT32:
1562
- extra_info = EnumTypeInfoTemplated<uint32_t>::Deserialize(reader, enum_size);
1606
+ auto schema_name = reader.ReadRequired<string>();
1607
+ auto enum_name = reader.ReadRequired<string>();
1608
+ auto deserialize_internals = reader.ReadRequired<bool>();
1609
+ if (!deserialize_internals) {
1610
+ // this means the enum should already be in the catalog.
1611
+ auto &client_context = reader.GetSource().GetContext();
1612
+ // See if the serializer has a catalog
1613
+ auto catalog = reader.GetSource().GetCatalog();
1614
+ if (catalog) {
1615
+ auto enum_type = catalog->GetType(client_context, schema_name, enum_name, true);
1616
+ if (enum_type != LogicalType::INVALID) {
1617
+ extra_info = enum_type.GetAuxInfoShrPtr();
1618
+ }
1619
+ }
1620
+ if (!extra_info) {
1621
+ throw InternalException("Could not find ENUM in the Catalog to deserialize");
1622
+ }
1563
1623
  break;
1564
- default:
1565
- throw InternalException("Invalid Physical Type for ENUMs");
1624
+ } else {
1625
+ auto enum_size = reader.ReadRequired<uint32_t>();
1626
+ auto enum_internal_type = EnumVectorDictType(enum_size);
1627
+ switch (enum_internal_type) {
1628
+ case PhysicalType::UINT8:
1629
+ extra_info = EnumTypeInfoTemplated<uint8_t>::Deserialize(reader, enum_size, enum_name);
1630
+ break;
1631
+ case PhysicalType::UINT16:
1632
+ extra_info = EnumTypeInfoTemplated<uint16_t>::Deserialize(reader, enum_size, enum_name);
1633
+ break;
1634
+ case PhysicalType::UINT32:
1635
+ extra_info = EnumTypeInfoTemplated<uint32_t>::Deserialize(reader, enum_size, enum_name);
1636
+ break;
1637
+ default:
1638
+ throw InternalException("Invalid Physical Type for ENUMs");
1639
+ }
1566
1640
  }
1567
1641
  } break;
1568
1642
  case ExtraTypeInfoType::AGGREGATE_STATE_TYPE_INFO:
@@ -1592,6 +1666,15 @@ void LogicalType::Serialize(Serializer &serializer) const {
1592
1666
  writer.Finalize();
1593
1667
  }
1594
1668
 
1669
+ void LogicalType::SerializeEnumType(Serializer &serializer) const {
1670
+ FieldWriter writer(serializer);
1671
+ writer.WriteField<LogicalTypeId>(id_);
1672
+ writer.WriteField<ExtraTypeInfoType>(type_info_->type);
1673
+ EnumType::Serialize(writer, *type_info_, true);
1674
+ writer.WriteString(type_info_->alias);
1675
+ writer.Finalize();
1676
+ }
1677
+
1595
1678
  LogicalType LogicalType::Deserialize(Deserializer &source) {
1596
1679
  FieldReader reader(source);
1597
1680
  auto id = reader.ReadRequired<LogicalTypeId>();