duckdb 0.7.2-dev717.0 → 0.7.2-dev832.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/binding.gyp +2 -0
  2. package/lib/duckdb.d.ts +12 -1
  3. package/lib/duckdb.js +19 -0
  4. package/package.json +1 -1
  5. package/src/duckdb/extension/json/include/json_common.hpp +1 -0
  6. package/src/duckdb/extension/json/include/json_functions.hpp +1 -0
  7. package/src/duckdb/extension/json/include/json_serializer.hpp +77 -0
  8. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +147 -0
  9. package/src/duckdb/extension/json/json_functions.cpp +1 -0
  10. package/src/duckdb/extension/json/json_scan.cpp +2 -2
  11. package/src/duckdb/extension/json/json_serializer.cpp +217 -0
  12. package/src/duckdb/src/common/enums/expression_type.cpp +8 -222
  13. package/src/duckdb/src/common/enums/join_type.cpp +3 -22
  14. package/src/duckdb/src/common/exception.cpp +2 -2
  15. package/src/duckdb/src/common/serializer/enum_serializer.cpp +1172 -0
  16. package/src/duckdb/src/common/types/value.cpp +117 -0
  17. package/src/duckdb/src/common/types/vector.cpp +140 -1
  18. package/src/duckdb/src/common/types.cpp +166 -89
  19. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +243 -0
  20. package/src/duckdb/src/function/scalar/string/regexp/regexp_util.cpp +79 -0
  21. package/src/duckdb/src/function/scalar/string/regexp.cpp +21 -80
  22. package/src/duckdb/src/function/table/arrow_conversion.cpp +7 -1
  23. package/src/duckdb/src/function/table/table_scan.cpp +1 -1
  24. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  25. package/src/duckdb/src/include/duckdb/common/enums/aggregate_handling.hpp +2 -0
  26. package/src/duckdb/src/include/duckdb/common/enums/expression_type.hpp +2 -3
  27. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -0
  28. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +2 -0
  29. package/src/duckdb/src/include/duckdb/common/enums/set_operation_type.hpp +2 -1
  30. package/src/duckdb/src/include/duckdb/common/exception.hpp +40 -9
  31. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +3 -0
  32. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +113 -0
  33. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +336 -0
  34. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +268 -0
  35. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +126 -0
  36. package/src/duckdb/src/include/duckdb/common/string_util.hpp +12 -0
  37. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -0
  38. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -0
  39. package/src/duckdb/src/include/duckdb/common/types.hpp +8 -2
  40. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +81 -1
  41. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  42. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  43. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  44. package/src/duckdb/src/include/duckdb/parser/expression/bound_expression.hpp +2 -0
  45. package/src/duckdb/src/include/duckdb/parser/expression/case_expression.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +2 -0
  47. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +2 -0
  48. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +2 -0
  49. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -0
  50. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +2 -0
  51. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  52. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  53. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -0
  54. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +2 -0
  55. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +2 -0
  56. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +2 -0
  57. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +2 -0
  58. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +2 -0
  59. package/src/duckdb/src/include/duckdb/parser/expression/subquery_expression.hpp +2 -0
  60. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +5 -0
  61. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +2 -0
  62. package/src/duckdb/src/include/duckdb/parser/parsed_expression.hpp +5 -0
  63. package/src/duckdb/src/include/duckdb/parser/query_node/recursive_cte_node.hpp +3 -0
  64. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +5 -0
  65. package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +3 -0
  66. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +11 -1
  67. package/src/duckdb/src/include/duckdb/parser/result_modifier.hpp +24 -1
  68. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -1
  69. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +6 -1
  70. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +4 -0
  71. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +2 -0
  72. package/src/duckdb/src/include/duckdb/parser/tableref/expressionlistref.hpp +3 -0
  73. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +3 -0
  74. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +9 -0
  75. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  76. package/src/duckdb/src/include/duckdb/parser/tableref/table_function_ref.hpp +3 -0
  77. package/src/duckdb/src/include/duckdb/parser/tableref.hpp +3 -1
  78. package/src/duckdb/src/main/extension/extension_install.cpp +7 -2
  79. package/src/duckdb/src/optimizer/deliminator.cpp +1 -1
  80. package/src/duckdb/src/optimizer/filter_combiner.cpp +1 -1
  81. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +3 -3
  82. package/src/duckdb/src/optimizer/rule/move_constants.cpp +2 -2
  83. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +1 -1
  84. package/src/duckdb/src/parser/common_table_expression_info.cpp +19 -0
  85. package/src/duckdb/src/parser/expression/between_expression.cpp +17 -0
  86. package/src/duckdb/src/parser/expression/case_expression.cpp +28 -0
  87. package/src/duckdb/src/parser/expression/cast_expression.cpp +17 -0
  88. package/src/duckdb/src/parser/expression/collate_expression.cpp +16 -0
  89. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -0
  90. package/src/duckdb/src/parser/expression/comparison_expression.cpp +16 -0
  91. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +15 -0
  92. package/src/duckdb/src/parser/expression/constant_expression.cpp +14 -0
  93. package/src/duckdb/src/parser/expression/default_expression.cpp +7 -0
  94. package/src/duckdb/src/parser/expression/function_expression.cpp +35 -0
  95. package/src/duckdb/src/parser/expression/lambda_expression.cpp +16 -0
  96. package/src/duckdb/src/parser/expression/operator_expression.cpp +15 -0
  97. package/src/duckdb/src/parser/expression/parameter_expression.cpp +15 -0
  98. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +14 -0
  99. package/src/duckdb/src/parser/expression/star_expression.cpp +20 -0
  100. package/src/duckdb/src/parser/expression/subquery_expression.cpp +20 -0
  101. package/src/duckdb/src/parser/expression/window_expression.cpp +43 -0
  102. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +22 -10
  103. package/src/duckdb/src/parser/parsed_expression.cpp +72 -0
  104. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +21 -0
  105. package/src/duckdb/src/parser/query_node/select_node.cpp +31 -0
  106. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +17 -0
  107. package/src/duckdb/src/parser/query_node.cpp +50 -0
  108. package/src/duckdb/src/parser/result_modifier.cpp +78 -0
  109. package/src/duckdb/src/parser/statement/select_statement.cpp +12 -0
  110. package/src/duckdb/src/parser/tableref/basetableref.cpp +21 -0
  111. package/src/duckdb/src/parser/tableref/emptytableref.cpp +4 -0
  112. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +17 -0
  113. package/src/duckdb/src/parser/tableref/joinref.cpp +25 -0
  114. package/src/duckdb/src/parser/tableref/pivotref.cpp +53 -0
  115. package/src/duckdb/src/parser/tableref/subqueryref.cpp +15 -0
  116. package/src/duckdb/src/parser/tableref/table_function.cpp +17 -0
  117. package/src/duckdb/src/parser/tableref.cpp +46 -0
  118. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +1 -1
  119. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  120. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +1 -1
  121. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +4 -0
  122. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +1 -1
  123. package/src/duckdb/src/planner/expression/bound_expression.cpp +4 -0
  124. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +0 -1
  125. package/src/duckdb/third_party/re2/re2/re2.cc +9 -0
  126. package/src/duckdb/third_party/re2/re2/re2.h +2 -0
  127. package/src/duckdb/ub_extension_json_json_functions.cpp +2 -0
  128. package/src/duckdb/ub_src_common_serializer.cpp +2 -0
  129. package/src/duckdb/ub_src_function_scalar_string_regexp.cpp +4 -0
  130. package/src/duckdb/ub_src_parser.cpp +2 -0
  131. package/src/utils.cpp +12 -0
  132. package/test/extension.test.ts +44 -26
@@ -30,6 +30,9 @@
30
30
  #include "duckdb/function/cast/cast_function_set.hpp"
31
31
  #include "duckdb/main/error_manager.hpp"
32
32
 
33
+ #include "duckdb/common/serializer/format_serializer.hpp"
34
+ #include "duckdb/common/serializer/format_deserializer.hpp"
35
+
33
36
  #include <utility>
34
37
  #include <cmath>
35
38
 
@@ -1800,6 +1803,120 @@ Value Value::Deserialize(Deserializer &main_source) {
1800
1803
  return new_value;
1801
1804
  }
1802
1805
 
1806
+ void Value::FormatSerialize(FormatSerializer &serializer) const {
1807
+ serializer.WriteProperty("type", type_);
1808
+ serializer.WriteProperty("is_null", is_null);
1809
+ if (!IsNull()) {
1810
+ switch (type_.InternalType()) {
1811
+ case PhysicalType::BOOL:
1812
+ serializer.WriteProperty("value", value_.boolean);
1813
+ break;
1814
+ case PhysicalType::INT8:
1815
+ serializer.WriteProperty("value", value_.tinyint);
1816
+ break;
1817
+ case PhysicalType::INT16:
1818
+ serializer.WriteProperty("value", value_.smallint);
1819
+ break;
1820
+ case PhysicalType::INT32:
1821
+ serializer.WriteProperty("value", value_.integer);
1822
+ break;
1823
+ case PhysicalType::INT64:
1824
+ serializer.WriteProperty("value", value_.bigint);
1825
+ break;
1826
+ case PhysicalType::UINT8:
1827
+ serializer.WriteProperty("value", value_.utinyint);
1828
+ break;
1829
+ case PhysicalType::UINT16:
1830
+ serializer.WriteProperty("value", value_.usmallint);
1831
+ break;
1832
+ case PhysicalType::UINT32:
1833
+ serializer.WriteProperty("value", value_.uinteger);
1834
+ break;
1835
+ case PhysicalType::UINT64:
1836
+ serializer.WriteProperty("value", value_.ubigint);
1837
+ break;
1838
+ case PhysicalType::INT128:
1839
+ serializer.WriteProperty("value", value_.hugeint);
1840
+ break;
1841
+ case PhysicalType::FLOAT:
1842
+ serializer.WriteProperty("value", value_.float_);
1843
+ break;
1844
+ case PhysicalType::DOUBLE:
1845
+ serializer.WriteProperty("value", value_.double_);
1846
+ break;
1847
+ case PhysicalType::INTERVAL:
1848
+ serializer.WriteProperty("value", value_.interval);
1849
+ break;
1850
+ case PhysicalType::VARCHAR:
1851
+ serializer.WriteProperty("value", StringValue::Get(*this));
1852
+ break;
1853
+ default: {
1854
+ Vector v(*this);
1855
+ v.FormatSerialize(serializer, 1);
1856
+ break;
1857
+ }
1858
+ }
1859
+ }
1860
+ }
1861
+
1862
+ Value Value::FormatDeserialize(FormatDeserializer &deserializer) {
1863
+ auto type = deserializer.ReadProperty<LogicalType>("type");
1864
+ auto is_null = deserializer.ReadProperty<bool>("is_null");
1865
+ Value new_value = Value(type);
1866
+ if (is_null) {
1867
+ return new_value;
1868
+ }
1869
+ new_value.is_null = false;
1870
+ switch (type.InternalType()) {
1871
+ case PhysicalType::BOOL:
1872
+ new_value.value_.boolean = deserializer.ReadProperty<bool>("value");
1873
+ break;
1874
+ case PhysicalType::UINT8:
1875
+ new_value.value_.utinyint = deserializer.ReadProperty<uint8_t>("value");
1876
+ break;
1877
+ case PhysicalType::INT8:
1878
+ new_value.value_.tinyint = deserializer.ReadProperty<int8_t>("value");
1879
+ break;
1880
+ case PhysicalType::UINT16:
1881
+ new_value.value_.usmallint = deserializer.ReadProperty<uint16_t>("value");
1882
+ break;
1883
+ case PhysicalType::INT16:
1884
+ new_value.value_.smallint = deserializer.ReadProperty<int16_t>("value");
1885
+ break;
1886
+ case PhysicalType::UINT32:
1887
+ new_value.value_.uinteger = deserializer.ReadProperty<uint32_t>("value");
1888
+ break;
1889
+ case PhysicalType::INT32:
1890
+ new_value.value_.integer = deserializer.ReadProperty<int32_t>("value");
1891
+ break;
1892
+ case PhysicalType::UINT64:
1893
+ new_value.value_.ubigint = deserializer.ReadProperty<uint64_t>("value");
1894
+ break;
1895
+ case PhysicalType::INT64:
1896
+ new_value.value_.bigint = deserializer.ReadProperty<int64_t>("value");
1897
+ break;
1898
+ case PhysicalType::FLOAT:
1899
+ new_value.value_.float_ = deserializer.ReadProperty<float>("value");
1900
+ break;
1901
+ case PhysicalType::DOUBLE:
1902
+ new_value.value_.double_ = deserializer.ReadProperty<double>("value");
1903
+ break;
1904
+ case PhysicalType::INTERVAL:
1905
+ new_value.value_.interval = deserializer.ReadProperty<interval_t>("value");
1906
+ break;
1907
+ case PhysicalType::VARCHAR:
1908
+ new_value.value_info_ = make_shared<StringValueInfo>(deserializer.ReadProperty<string>("value"));
1909
+ break;
1910
+ default: {
1911
+ Vector v(type);
1912
+ v.FormatDeserialize(deserializer, 1);
1913
+ new_value = v.GetValue(0);
1914
+ break;
1915
+ }
1916
+ }
1917
+ return new_value;
1918
+ }
1919
+
1803
1920
  void Value::Print() const {
1804
1921
  Printer::Print(ToString());
1805
1922
  }
@@ -18,6 +18,9 @@
18
18
  #include "duckdb/common/fsst.hpp"
19
19
  #include "fsst.h"
20
20
 
21
+ #include "duckdb/common/serializer/format_serializer.hpp"
22
+ #include "duckdb/common/serializer/format_deserializer.hpp"
23
+
21
24
  #include <cstring> // strlen() on Solaris
22
25
 
23
26
  namespace duckdb {
@@ -241,7 +244,8 @@ struct DataArrays {
241
244
  idx_t type_size;
242
245
  bool is_nested;
243
246
  DataArrays(Vector &vec, data_ptr_t data, VectorBuffer *buffer, idx_t type_size, bool is_nested)
244
- : vec(vec), data(data), buffer(buffer), type_size(type_size), is_nested(is_nested) {};
247
+ : vec(vec), data(data), buffer(buffer), type_size(type_size), is_nested(is_nested) {
248
+ }
245
249
  };
246
250
 
247
251
  void FindChildren(std::vector<DataArrays> &to_resize, VectorBuffer &auxiliary) {
@@ -962,6 +966,141 @@ void Vector::Serialize(idx_t count, Serializer &serializer) {
962
966
  }
963
967
  }
964
968
 
969
+ void Vector::FormatSerialize(FormatSerializer &serializer, idx_t count) {
970
+ auto &type = GetType();
971
+
972
+ UnifiedVectorFormat vdata;
973
+ ToUnifiedFormat(count, vdata);
974
+
975
+ const auto write_validity = (count > 0) && !vdata.validity.AllValid();
976
+ serializer.WriteProperty("has_validity", write_validity);
977
+ if (write_validity) {
978
+ ValidityMask flat_mask(count);
979
+ for (idx_t i = 0; i < count; ++i) {
980
+ auto row_idx = vdata.sel->get_index(i);
981
+ flat_mask.Set(i, vdata.validity.RowIsValid(row_idx));
982
+ }
983
+ serializer.WriteProperty("validity_mask", (const_data_ptr_t)flat_mask.GetData(),
984
+ flat_mask.ValidityMaskSize(count));
985
+ }
986
+ if (TypeIsConstantSize(type.InternalType())) {
987
+ // constant size type: simple copy
988
+ idx_t write_size = GetTypeIdSize(type.InternalType()) * count;
989
+ auto ptr = unique_ptr<data_t[]>(new data_t[write_size]);
990
+ VectorOperations::WriteToStorage(*this, count, ptr.get());
991
+ serializer.WriteProperty("data", write_size);
992
+ } else {
993
+ switch (type.InternalType()) {
994
+ case PhysicalType::VARCHAR: {
995
+ auto strings = (string_t *)vdata.data;
996
+ for (idx_t i = 0; i < count; i++) {
997
+ auto idx = vdata.sel->get_index(i);
998
+ auto source = !vdata.validity.RowIsValid(idx) ? NullValue<string_t>() : strings[idx];
999
+ string_t str = string_t(source.GetDataUnsafe(), source.GetSize());
1000
+ serializer.WriteProperty("data", str);
1001
+ }
1002
+ break;
1003
+ }
1004
+ case PhysicalType::STRUCT: {
1005
+ Flatten(count);
1006
+ auto &entries = StructVector::GetEntries(*this);
1007
+ for (auto &entry : entries) {
1008
+ entry->FormatSerialize(serializer, count);
1009
+ }
1010
+ break;
1011
+ }
1012
+ case PhysicalType::LIST: {
1013
+ auto &child = ListVector::GetEntry(*this);
1014
+ auto list_size = ListVector::GetListSize(*this);
1015
+
1016
+ // serialize the list entries in a flat array
1017
+ auto data = unique_ptr<list_entry_t[]>(new list_entry_t[count]);
1018
+ auto source_array = (list_entry_t *)vdata.data;
1019
+ for (idx_t i = 0; i < count; i++) {
1020
+ auto idx = vdata.sel->get_index(i);
1021
+ auto source = source_array[idx];
1022
+ data[i].offset = source.offset;
1023
+ data[i].length = source.length;
1024
+ }
1025
+
1026
+ // write the list size
1027
+ serializer.WriteProperty("list_size", list_size);
1028
+ serializer.WriteProperty("data", (data_ptr_t)data.get(), count * sizeof(list_entry_t));
1029
+ child.FormatSerialize(serializer, list_size);
1030
+ break;
1031
+ }
1032
+ default:
1033
+ throw InternalException("Unimplemented variable width type for Vector::Serialize!");
1034
+ }
1035
+ }
1036
+ }
1037
+
1038
+ void Vector::FormatDeserialize(FormatDeserializer &deserializer, idx_t count) {
1039
+ /*
1040
+ auto &type = GetType();
1041
+
1042
+ auto &validity = FlatVector::Validity(*this);
1043
+ validity.Reset();
1044
+ const auto has_validity = deserializer.ReadProperty<bool>("has_validity");
1045
+ if (has_validity) {
1046
+ validity.Initialize(count);
1047
+ source.ReadData((data_ptr_t)validity.GetData(), validity.ValidityMaskSize(count));
1048
+ }
1049
+
1050
+ if (TypeIsConstantSize(type.InternalType())) {
1051
+ // constant size type: read fixed amount of data from
1052
+ auto column_size = GetTypeIdSize(type.InternalType()) * count;
1053
+ auto ptr = unique_ptr<data_t[]>(new data_t[column_size]);
1054
+ source.ReadData(ptr.get(), column_size);
1055
+
1056
+ VectorOperations::ReadFromStorage(ptr.get(), count, *this);
1057
+ } else {
1058
+ switch (type.InternalType()) {
1059
+ case PhysicalType::VARCHAR: {
1060
+ auto strings = FlatVector::GetData<string_t>(*this);
1061
+ for (idx_t i = 0; i < count; i++) {
1062
+ // read the strings
1063
+ auto str = source.Read<string>();
1064
+ // now add the string to the StringHeap of the vector
1065
+ // and write the pointer into the vector
1066
+ if (validity.RowIsValid(i)) {
1067
+ strings[i] = StringVector::AddStringOrBlob(*this, str);
1068
+ }
1069
+ }
1070
+ break;
1071
+ }
1072
+ case PhysicalType::STRUCT: {
1073
+ auto &entries = StructVector::GetEntries(*this);
1074
+ for (auto &entry : entries) {
1075
+ entry->FormatDeserialize(deserializer, count);
1076
+ }
1077
+ break;
1078
+ }
1079
+ case PhysicalType::LIST: {
1080
+ // read the list size
1081
+ auto list_size = deserializer.ReadProperty<idx_t>("list_size");
1082
+ ListVector::Reserve(*this, list_size);
1083
+ ListVector::SetListSize(*this, list_size);
1084
+
1085
+ // read the list entry
1086
+ auto list_entries = FlatVector::GetData(*this);
1087
+ source.ReadData(list_entries, count * sizeof(list_entry_t));
1088
+
1089
+ // deserialize the child vector
1090
+ auto &child = ListVector::GetEntry(*this);
1091
+ child.Deserialize(list_size, source);
1092
+
1093
+ break;
1094
+ }
1095
+ default:
1096
+ throw InternalException("Unimplemented variable width type for Vector::Deserialize!");
1097
+ }
1098
+ }
1099
+ */
1100
+
1101
+ throw NotImplementedException("TODO: Implement deserialization for DuckDB Vectors");
1102
+ }
1103
+
965
1104
  void Vector::Deserialize(idx_t count, Deserializer &source) {
966
1105
  auto &type = GetType();
967
1106
 
@@ -27,6 +27,10 @@
27
27
  #include "duckdb/parser/keyword_helper.hpp"
28
28
  #include "duckdb/parser/parser.hpp"
29
29
 
30
+ #include "duckdb/common/serializer/format_deserializer.hpp"
31
+ #include "duckdb/common/serializer/enum_serializer.hpp"
32
+ #include "duckdb/common/serializer/format_serializer.hpp"
33
+
30
34
  #include <cmath>
31
35
 
32
36
  namespace duckdb {
@@ -327,95 +331,8 @@ bool TypeIsInteger(PhysicalType type) {
327
331
  return (type >= PhysicalType::UINT8 && type <= PhysicalType::INT64) || type == PhysicalType::INT128;
328
332
  }
329
333
 
330
- // LCOV_EXCL_START
331
334
  string LogicalTypeIdToString(LogicalTypeId id) {
332
- switch (id) {
333
- case LogicalTypeId::BOOLEAN:
334
- return "BOOLEAN";
335
- case LogicalTypeId::TINYINT:
336
- return "TINYINT";
337
- case LogicalTypeId::SMALLINT:
338
- return "SMALLINT";
339
- case LogicalTypeId::INTEGER:
340
- return "INTEGER";
341
- case LogicalTypeId::BIGINT:
342
- return "BIGINT";
343
- case LogicalTypeId::HUGEINT:
344
- return "HUGEINT";
345
- case LogicalTypeId::UUID:
346
- return "UUID";
347
- case LogicalTypeId::UTINYINT:
348
- return "UTINYINT";
349
- case LogicalTypeId::USMALLINT:
350
- return "USMALLINT";
351
- case LogicalTypeId::UINTEGER:
352
- return "UINTEGER";
353
- case LogicalTypeId::UBIGINT:
354
- return "UBIGINT";
355
- case LogicalTypeId::DATE:
356
- return "DATE";
357
- case LogicalTypeId::TIME:
358
- return "TIME";
359
- case LogicalTypeId::TIMESTAMP:
360
- return "TIMESTAMP";
361
- case LogicalTypeId::TIMESTAMP_MS:
362
- return "TIMESTAMP_MS";
363
- case LogicalTypeId::TIMESTAMP_NS:
364
- return "TIMESTAMP_NS";
365
- case LogicalTypeId::TIMESTAMP_SEC:
366
- return "TIMESTAMP_S";
367
- case LogicalTypeId::TIMESTAMP_TZ:
368
- return "TIMESTAMP WITH TIME ZONE";
369
- case LogicalTypeId::TIME_TZ:
370
- return "TIME WITH TIME ZONE";
371
- case LogicalTypeId::FLOAT:
372
- return "FLOAT";
373
- case LogicalTypeId::DOUBLE:
374
- return "DOUBLE";
375
- case LogicalTypeId::DECIMAL:
376
- return "DECIMAL";
377
- case LogicalTypeId::VARCHAR:
378
- return "VARCHAR";
379
- case LogicalTypeId::BLOB:
380
- return "BLOB";
381
- case LogicalTypeId::CHAR:
382
- return "CHAR";
383
- case LogicalTypeId::INTERVAL:
384
- return "INTERVAL";
385
- case LogicalTypeId::SQLNULL:
386
- return "NULL";
387
- case LogicalTypeId::ANY:
388
- return "ANY";
389
- case LogicalTypeId::VALIDITY:
390
- return "VALIDITY";
391
- case LogicalTypeId::STRUCT:
392
- return "STRUCT";
393
- case LogicalTypeId::LIST:
394
- return "LIST";
395
- case LogicalTypeId::MAP:
396
- return "MAP";
397
- case LogicalTypeId::POINTER:
398
- return "POINTER";
399
- case LogicalTypeId::TABLE:
400
- return "TABLE";
401
- case LogicalTypeId::LAMBDA:
402
- return "LAMBDA";
403
- case LogicalTypeId::INVALID:
404
- return "INVALID";
405
- case LogicalTypeId::UNION:
406
- return "UNION";
407
- case LogicalTypeId::UNKNOWN:
408
- return "UNKNOWN";
409
- case LogicalTypeId::ENUM:
410
- return "ENUM";
411
- case LogicalTypeId::AGGREGATE_STATE:
412
- return "AGGREGATE_STATE";
413
- case LogicalTypeId::USER:
414
- return "USER";
415
- case LogicalTypeId::BIT:
416
- return "BIT";
417
- }
418
- return "UNDEFINED";
335
+ return EnumSerializer::EnumToString(id);
419
336
  }
420
337
 
421
338
  string LogicalType::ToString() const {
@@ -883,12 +800,16 @@ public:
883
800
  return alias == other_p->alias && EqualsInternal(other_p);
884
801
  }
885
802
  //! Serializes a ExtraTypeInfo to a stand-alone binary blob
886
- virtual void Serialize(FieldWriter &writer) const {};
803
+ virtual void Serialize(FieldWriter &writer) const {
804
+ }
887
805
  //! Serializes a ExtraTypeInfo to a stand-alone binary blob
888
806
  static void Serialize(ExtraTypeInfo *info, FieldWriter &writer);
889
807
  //! Deserializes a blob back into an ExtraTypeInfo
890
808
  static shared_ptr<ExtraTypeInfo> Deserialize(FieldReader &reader);
891
809
 
810
+ virtual void FormatSerialize(FormatSerializer &serializer) const;
811
+ static shared_ptr<ExtraTypeInfo> FormatDeserialize(FormatDeserializer &source);
812
+
892
813
  protected:
893
814
  virtual bool EqualsInternal(ExtraTypeInfo *other_p) const {
894
815
  // Do nothing
@@ -959,6 +880,18 @@ public:
959
880
  writer.WriteField<uint8_t>(scale);
960
881
  }
961
882
 
883
+ void FormatSerialize(FormatSerializer &serializer) const override {
884
+ ExtraTypeInfo::FormatSerialize(serializer);
885
+ serializer.WriteProperty("width", width);
886
+ serializer.WriteProperty("scale", scale);
887
+ }
888
+
889
+ static shared_ptr<ExtraTypeInfo> FormatDeserialize(FormatDeserializer &source) {
890
+ auto width = source.ReadProperty<uint8_t>("width");
891
+ auto scale = source.ReadProperty<uint8_t>("scale");
892
+ return make_shared<DecimalTypeInfo>(width, scale);
893
+ }
894
+
962
895
  static shared_ptr<ExtraTypeInfo> Deserialize(FieldReader &reader) {
963
896
  auto width = reader.ReadRequired<uint8_t>();
964
897
  auto scale = reader.ReadRequired<uint8_t>();
@@ -1016,6 +949,16 @@ public:
1016
949
  return make_shared<StringTypeInfo>(std::move(collation));
1017
950
  }
1018
951
 
952
+ void FormatSerialize(FormatSerializer &serializer) const override {
953
+ ExtraTypeInfo::FormatSerialize(serializer);
954
+ serializer.WriteProperty("collation", collation);
955
+ }
956
+
957
+ static shared_ptr<ExtraTypeInfo> FormatDeserialize(FormatDeserializer &source) {
958
+ auto collation = source.ReadProperty<string>("collation");
959
+ return make_shared<StringTypeInfo>(std::move(collation));
960
+ }
961
+
1019
962
  protected:
1020
963
  bool EqualsInternal(ExtraTypeInfo *other_p) const override {
1021
964
  // collation info has no impact on equality
@@ -1057,11 +1000,21 @@ public:
1057
1000
  writer.WriteSerializable(child_type);
1058
1001
  }
1059
1002
 
1003
+ void FormatSerialize(FormatSerializer &serializer) const override {
1004
+ ExtraTypeInfo::FormatSerialize(serializer);
1005
+ serializer.WriteProperty("child_type", child_type);
1006
+ }
1007
+
1060
1008
  static shared_ptr<ExtraTypeInfo> Deserialize(FieldReader &reader) {
1061
1009
  auto child_type = reader.ReadRequiredSerializable<LogicalType, LogicalType>();
1062
1010
  return make_shared<ListTypeInfo>(std::move(child_type));
1063
1011
  }
1064
1012
 
1013
+ static shared_ptr<ExtraTypeInfo> FormatDeserialize(FormatDeserializer &source) {
1014
+ auto child_type = source.ReadProperty<LogicalType>("child_type");
1015
+ return make_shared<ListTypeInfo>(std::move(child_type));
1016
+ }
1017
+
1065
1018
  protected:
1066
1019
  bool EqualsInternal(ExtraTypeInfo *other_p) const override {
1067
1020
  auto &other = (ListTypeInfo &)*other_p;
@@ -1101,6 +1054,11 @@ public:
1101
1054
  }
1102
1055
  }
1103
1056
 
1057
+ void FormatSerialize(FormatSerializer &serializer) const override {
1058
+ ExtraTypeInfo::FormatSerialize(serializer);
1059
+ serializer.WriteProperty("child_types", child_types);
1060
+ }
1061
+
1104
1062
  static shared_ptr<ExtraTypeInfo> Deserialize(FieldReader &reader) {
1105
1063
  child_list_t<LogicalType> child_list;
1106
1064
  auto child_types_size = reader.ReadRequired<uint32_t>();
@@ -1113,6 +1071,11 @@ public:
1113
1071
  return make_shared<StructTypeInfo>(std::move(child_list));
1114
1072
  }
1115
1073
 
1074
+ static shared_ptr<ExtraTypeInfo> FormatDeserialize(FormatDeserializer &deserializer) {
1075
+ auto child_types = deserializer.ReadProperty<child_list_t<LogicalType>>("child_types");
1076
+ return make_shared<StructTypeInfo>(std::move(child_types));
1077
+ }
1078
+
1116
1079
  protected:
1117
1080
  bool EqualsInternal(ExtraTypeInfo *other_p) const override {
1118
1081
  auto &other = (StructTypeInfo &)*other_p;
@@ -1138,6 +1101,21 @@ public:
1138
1101
  }
1139
1102
  }
1140
1103
 
1104
+ void FormatSerialize(FormatSerializer &serializer) const override {
1105
+ ExtraTypeInfo::FormatSerialize(serializer);
1106
+ serializer.WriteProperty("function_name", state_type.function_name);
1107
+ serializer.WriteProperty("return_type", state_type.return_type);
1108
+ serializer.WriteProperty("bound_argument_types", state_type.bound_argument_types);
1109
+ }
1110
+
1111
+ static shared_ptr<ExtraTypeInfo> FormatDeserialize(FormatDeserializer &source) {
1112
+ auto function_name = source.ReadProperty<string>("function_name");
1113
+ auto return_type = source.ReadProperty<LogicalType>("return_type");
1114
+ auto bound_argument_types = source.ReadProperty<vector<LogicalType>>("bound_argument_types");
1115
+ return make_shared<AggregateStateTypeInfo>(
1116
+ aggregate_state_t(std::move(function_name), std::move(return_type), std::move(bound_argument_types)));
1117
+ }
1118
+
1141
1119
  static shared_ptr<ExtraTypeInfo> Deserialize(FieldReader &reader) {
1142
1120
  auto &source = reader.GetSource();
1143
1121
 
@@ -1294,11 +1272,21 @@ public:
1294
1272
  writer.WriteString(user_type_name);
1295
1273
  }
1296
1274
 
1275
+ void FormatSerialize(FormatSerializer &serializer) const override {
1276
+ ExtraTypeInfo::FormatSerialize(serializer);
1277
+ serializer.WriteProperty("user_type_name", user_type_name);
1278
+ }
1279
+
1297
1280
  static shared_ptr<ExtraTypeInfo> Deserialize(FieldReader &reader) {
1298
1281
  auto enum_name = reader.ReadRequired<string>();
1299
1282
  return make_shared<UserTypeInfo>(std::move(enum_name));
1300
1283
  }
1301
1284
 
1285
+ static shared_ptr<ExtraTypeInfo> FormatDeserialize(FormatDeserializer &source) {
1286
+ auto enum_name = source.ReadProperty<string>("user_type_name");
1287
+ return make_shared<UserTypeInfo>(std::move(enum_name));
1288
+ }
1289
+
1302
1290
  protected:
1303
1291
  bool EqualsInternal(ExtraTypeInfo *other_p) const override {
1304
1292
  auto &other = (UserTypeInfo &)*other_p;
@@ -1380,6 +1368,12 @@ protected:
1380
1368
  EnumType::Serialize(writer, *this, serialize_internals);
1381
1369
  }
1382
1370
 
1371
+ void FormatSerialize(FormatSerializer &serializer) const override {
1372
+ ExtraTypeInfo::FormatSerialize(serializer);
1373
+ serializer.WriteProperty("dict_size", dict_size);
1374
+ serializer.WriteProperty("enum_name", enum_name);
1375
+ ((Vector &)values_insert_order).FormatSerialize(serializer, dict_size);
1376
+ }
1383
1377
  Vector values_insert_order;
1384
1378
 
1385
1379
  private:
@@ -1439,6 +1433,13 @@ struct EnumTypeInfoTemplated : public EnumTypeInfo {
1439
1433
  return make_shared<EnumTypeInfoTemplated>(std::move(enum_name), values_insert_order, size);
1440
1434
  }
1441
1435
 
1436
+ static shared_ptr<EnumTypeInfoTemplated> FormatDeserialize(FormatDeserializer &source, uint32_t size) {
1437
+ auto enum_name = source.ReadProperty<string>("enum_name");
1438
+ Vector values_insert_order(LogicalType::VARCHAR, size);
1439
+ values_insert_order.FormatDeserialize(source, size);
1440
+ return make_shared<EnumTypeInfoTemplated>(std::move(enum_name), values_insert_order, size);
1441
+ }
1442
+
1442
1443
  string_map_t<T> &GetValues() {
1443
1444
  return values;
1444
1445
  }
@@ -1573,6 +1574,70 @@ void ExtraTypeInfo::Serialize(ExtraTypeInfo *info, FieldWriter &writer) {
1573
1574
  writer.WriteString(info->alias);
1574
1575
  }
1575
1576
  }
1577
+ void ExtraTypeInfo::FormatSerialize(FormatSerializer &serializer) const {
1578
+ serializer.WriteProperty("type", type);
1579
+ serializer.WriteProperty("alias", alias);
1580
+ }
1581
+
1582
+ shared_ptr<ExtraTypeInfo> ExtraTypeInfo::FormatDeserialize(FormatDeserializer &deserializer) {
1583
+ auto type = deserializer.ReadProperty<ExtraTypeInfoType>("type");
1584
+
1585
+ shared_ptr<ExtraTypeInfo> result;
1586
+ switch (type) {
1587
+ case ExtraTypeInfoType::INVALID_TYPE_INFO: {
1588
+ string alias;
1589
+ deserializer.ReadOptionalProperty("alias", alias);
1590
+ if (!alias.empty()) {
1591
+ return make_shared<ExtraTypeInfo>(type, alias);
1592
+ }
1593
+ return nullptr;
1594
+ }
1595
+ case ExtraTypeInfoType::GENERIC_TYPE_INFO: {
1596
+ result = make_shared<ExtraTypeInfo>(type);
1597
+ } break;
1598
+ case ExtraTypeInfoType::DECIMAL_TYPE_INFO:
1599
+ result = DecimalTypeInfo::FormatDeserialize(deserializer);
1600
+ break;
1601
+ case ExtraTypeInfoType::STRING_TYPE_INFO:
1602
+ result = StringTypeInfo::FormatDeserialize(deserializer);
1603
+ break;
1604
+ case ExtraTypeInfoType::LIST_TYPE_INFO:
1605
+ result = ListTypeInfo::FormatDeserialize(deserializer);
1606
+ break;
1607
+ case ExtraTypeInfoType::STRUCT_TYPE_INFO:
1608
+ result = StructTypeInfo::FormatDeserialize(deserializer);
1609
+ break;
1610
+ case ExtraTypeInfoType::USER_TYPE_INFO:
1611
+ result = UserTypeInfo::FormatDeserialize(deserializer);
1612
+ break;
1613
+ case ExtraTypeInfoType::ENUM_TYPE_INFO: {
1614
+ auto enum_size = deserializer.ReadProperty<uint32_t>("enum_size");
1615
+ auto enum_internal_type = EnumVectorDictType(enum_size);
1616
+ switch (enum_internal_type) {
1617
+ case PhysicalType::UINT8:
1618
+ result = EnumTypeInfoTemplated<uint8_t>::FormatDeserialize(deserializer, enum_size);
1619
+ break;
1620
+ case PhysicalType::UINT16:
1621
+ result = EnumTypeInfoTemplated<uint16_t>::FormatDeserialize(deserializer, enum_size);
1622
+ break;
1623
+ case PhysicalType::UINT32:
1624
+ result = EnumTypeInfoTemplated<uint32_t>::FormatDeserialize(deserializer, enum_size);
1625
+ break;
1626
+ default:
1627
+ throw InternalException("Invalid Physical Type for ENUMs");
1628
+ }
1629
+ } break;
1630
+ case ExtraTypeInfoType::AGGREGATE_STATE_TYPE_INFO:
1631
+ result = AggregateStateTypeInfo::FormatDeserialize(deserializer);
1632
+ break;
1633
+
1634
+ default:
1635
+ throw InternalException("Unimplemented type info in ExtraTypeInfo::Deserialize");
1636
+ }
1637
+ deserializer.ReadOptionalPropertyOrDefault("alias", result->alias, string());
1638
+ return result;
1639
+ }
1640
+
1576
1641
  shared_ptr<ExtraTypeInfo> ExtraTypeInfo::Deserialize(FieldReader &reader) {
1577
1642
  auto type = reader.ReadRequired<ExtraTypeInfoType>();
1578
1643
  shared_ptr<ExtraTypeInfo> extra_info;
@@ -1684,6 +1749,18 @@ LogicalType LogicalType::Deserialize(Deserializer &source) {
1684
1749
  return LogicalType(id, std::move(info));
1685
1750
  }
1686
1751
 
1752
+ void LogicalType::FormatSerialize(FormatSerializer &serializer) const {
1753
+ serializer.WriteProperty("id", id_);
1754
+ serializer.WriteOptionalProperty("type_info", type_info_.get());
1755
+ }
1756
+
1757
+ LogicalType LogicalType::FormatDeserialize(FormatDeserializer &deserializer) {
1758
+ auto id = deserializer.ReadProperty<LogicalTypeId>("id");
1759
+ auto info = deserializer.ReadOptionalProperty<shared_ptr<ExtraTypeInfo>>("type_info");
1760
+
1761
+ return LogicalType(id, std::move(info));
1762
+ }
1763
+
1687
1764
  bool LogicalType::EqualTypeInfo(const LogicalType &rhs) const {
1688
1765
  if (type_info_.get() == rhs.type_info_.get()) {
1689
1766
  return true;