duckdb 1.1.4-dev13.0 → 1.1.4-dev14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/core_functions/function_list.cpp +1 -0
  4. package/src/duckdb/extension/core_functions/include/core_functions/scalar/map_functions.hpp +9 -0
  5. package/src/duckdb/extension/core_functions/scalar/date/current.cpp +1 -0
  6. package/src/duckdb/extension/core_functions/scalar/generic/can_implicitly_cast.cpp +2 -2
  7. package/src/duckdb/extension/core_functions/scalar/generic/typeof.cpp +1 -1
  8. package/src/duckdb/extension/core_functions/scalar/list/flatten.cpp +91 -61
  9. package/src/duckdb/extension/core_functions/scalar/map/map_extract.cpp +89 -8
  10. package/src/duckdb/extension/icu/icu-current.cpp +63 -0
  11. package/src/duckdb/extension/icu/icu-makedate.cpp +43 -39
  12. package/src/duckdb/extension/icu/icu-timezone.cpp +63 -63
  13. package/src/duckdb/extension/icu/icu_extension.cpp +2 -0
  14. package/src/duckdb/extension/icu/include/icu-casts.hpp +39 -0
  15. package/src/duckdb/extension/icu/include/icu-current.hpp +17 -0
  16. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  17. package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -1
  18. package/src/duckdb/extension/parquet/column_writer.cpp +26 -18
  19. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +0 -6
  20. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +15 -1
  21. package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
  22. package/src/duckdb/extension/parquet/parquet_extension.cpp +67 -15
  23. package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -3
  24. package/src/duckdb/extension/parquet/parquet_writer.cpp +5 -6
  25. package/src/duckdb/src/catalog/catalog.cpp +21 -8
  26. package/src/duckdb/src/catalog/catalog_search_path.cpp +17 -1
  27. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  28. package/src/duckdb/src/catalog/default/default_functions.cpp +0 -3
  29. package/src/duckdb/src/catalog/dependency_list.cpp +7 -0
  30. package/src/duckdb/src/common/adbc/adbc.cpp +1 -56
  31. package/src/duckdb/src/common/arrow/arrow_converter.cpp +3 -2
  32. package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +58 -28
  33. package/src/duckdb/src/common/arrow/schema_metadata.cpp +1 -1
  34. package/src/duckdb/src/common/compressed_file_system.cpp +6 -2
  35. package/src/duckdb/src/common/enum_util.cpp +26 -22
  36. package/src/duckdb/src/common/error_data.cpp +3 -2
  37. package/src/duckdb/src/common/gzip_file_system.cpp +8 -8
  38. package/src/duckdb/src/common/local_file_system.cpp +2 -2
  39. package/src/duckdb/src/common/multi_file_reader.cpp +1 -1
  40. package/src/duckdb/src/common/random_engine.cpp +4 -1
  41. package/src/duckdb/src/common/serializer/memory_stream.cpp +23 -19
  42. package/src/duckdb/src/common/serializer/serializer.cpp +1 -1
  43. package/src/duckdb/src/common/types/bit.cpp +1 -1
  44. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +0 -5
  45. package/src/duckdb/src/common/types/column/column_data_collection.cpp +4 -1
  46. package/src/duckdb/src/common/types/data_chunk.cpp +2 -1
  47. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +0 -4
  48. package/src/duckdb/src/common/types.cpp +1 -1
  49. package/src/duckdb/src/execution/index/art/art.cpp +52 -42
  50. package/src/duckdb/src/execution/index/art/leaf.cpp +4 -9
  51. package/src/duckdb/src/execution/index/art/node.cpp +13 -13
  52. package/src/duckdb/src/execution/index/art/prefix.cpp +21 -16
  53. package/src/duckdb/src/execution/index/bound_index.cpp +6 -8
  54. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +39 -34
  55. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +2 -1
  56. package/src/duckdb/src/execution/index/unbound_index.cpp +10 -0
  57. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +62 -44
  58. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +26 -0
  59. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +69 -40
  60. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +3 -7
  61. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +11 -5
  62. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +4 -0
  63. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +8 -8
  64. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +36 -12
  65. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +12 -9
  66. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +0 -1
  67. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +29 -1
  68. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -10
  69. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +58 -35
  70. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +2 -1
  71. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +9 -4
  72. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +7 -6
  73. package/src/duckdb/src/function/compression_config.cpp +4 -0
  74. package/src/duckdb/src/function/function_binder.cpp +1 -1
  75. package/src/duckdb/src/function/scalar/system/write_log.cpp +2 -2
  76. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +15 -2
  77. package/src/duckdb/src/function/table/arrow_conversion.cpp +10 -10
  78. package/src/duckdb/src/function/table/copy_csv.cpp +8 -5
  79. package/src/duckdb/src/function/table/read_csv.cpp +21 -4
  80. package/src/duckdb/src/function/table/sniff_csv.cpp +7 -0
  81. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +4 -0
  82. package/src/duckdb/src/function/table/system/duckdb_secret_types.cpp +71 -0
  83. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  84. package/src/duckdb/src/function/table/table_scan.cpp +120 -36
  85. package/src/duckdb/src/function/table/version/pragma_version.cpp +4 -4
  86. package/src/duckdb/src/function/window/window_aggregate_function.cpp +6 -1
  87. package/src/duckdb/src/function/window/window_boundaries_state.cpp +135 -11
  88. package/src/duckdb/src/function/window/window_segment_tree.cpp +50 -22
  89. package/src/duckdb/src/function/window/window_token_tree.cpp +4 -3
  90. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +4 -0
  91. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +2 -0
  92. package/src/duckdb/src/include/duckdb/catalog/dependency_list.hpp +1 -0
  93. package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +4 -2
  94. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -8
  95. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +0 -2
  96. package/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +8 -3
  97. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +6 -1
  98. package/src/duckdb/src/include/duckdb/common/serializer/serialization_data.hpp +25 -0
  99. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +9 -3
  100. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
  101. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +11 -14
  102. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +5 -4
  103. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +21 -10
  104. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +6 -5
  105. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +37 -32
  106. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +36 -1
  107. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +3 -0
  108. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +2 -0
  109. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +5 -5
  110. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +5 -30
  111. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +7 -1
  112. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +3 -3
  113. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +1 -0
  114. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  115. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +2 -2
  116. package/src/duckdb/src/include/duckdb/logging/logger.hpp +40 -119
  117. package/src/duckdb/src/include/duckdb/logging/logging.hpp +0 -2
  118. package/src/duckdb/src/include/duckdb/main/config.hpp +5 -0
  119. package/src/duckdb/src/include/duckdb/main/connection.hpp +0 -8
  120. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -1
  121. package/src/duckdb/src/include/duckdb/main/extension.hpp +1 -0
  122. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +11 -7
  123. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -0
  124. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +2 -0
  125. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +3 -0
  126. package/src/duckdb/src/include/duckdb/main/settings.hpp +10 -0
  127. package/src/duckdb/src/include/duckdb/parser/constraint.hpp +9 -0
  128. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +36 -9
  129. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +2 -1
  130. package/src/duckdb/src/include/duckdb/parser/query_node/set_operation_node.hpp +8 -2
  131. package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -0
  132. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +9 -1
  133. package/src/duckdb/src/include/duckdb/planner/filter/constant_filter.hpp +1 -0
  134. package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +0 -2
  135. package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +4 -4
  136. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +1 -1
  137. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +14 -10
  138. package/src/duckdb/src/include/duckdb/storage/index_storage_info.hpp +4 -0
  139. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +6 -1
  140. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +7 -2
  141. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +9 -0
  142. package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +2 -0
  143. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +4 -3
  144. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +2 -0
  145. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +6 -4
  146. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +1 -1
  147. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +2 -0
  148. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -0
  149. package/src/duckdb/src/include/duckdb/transaction/meta_transaction.hpp +1 -1
  150. package/src/duckdb/src/logging/logger.cpp +8 -66
  151. package/src/duckdb/src/main/attached_database.cpp +3 -1
  152. package/src/duckdb/src/main/client_context.cpp +4 -2
  153. package/src/duckdb/src/main/config.cpp +20 -2
  154. package/src/duckdb/src/main/connection.cpp +2 -29
  155. package/src/duckdb/src/main/connection_manager.cpp +5 -3
  156. package/src/duckdb/src/main/database.cpp +2 -2
  157. package/src/duckdb/src/main/extension/extension_helper.cpp +4 -5
  158. package/src/duckdb/src/main/extension/extension_install.cpp +23 -10
  159. package/src/duckdb/src/main/extension/extension_load.cpp +6 -7
  160. package/src/duckdb/src/main/extension.cpp +27 -9
  161. package/src/duckdb/src/main/secret/secret_manager.cpp +11 -0
  162. package/src/duckdb/src/main/settings/custom_settings.cpp +44 -0
  163. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +6 -0
  164. package/src/duckdb/src/optimizer/filter_combiner.cpp +13 -3
  165. package/src/duckdb/src/optimizer/filter_pushdown.cpp +33 -6
  166. package/src/duckdb/src/optimizer/late_materialization.cpp +14 -3
  167. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +0 -3
  168. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +5 -1
  169. package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +6 -3
  170. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +49 -0
  171. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +1 -0
  172. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +50 -12
  173. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +7 -5
  174. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +1 -0
  175. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +2 -2
  176. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +12 -2
  177. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +0 -1
  178. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +55 -39
  179. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +2 -1
  180. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +15 -7
  181. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +13 -8
  182. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +8 -3
  183. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +17 -1
  184. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +1 -0
  185. package/src/duckdb/src/planner/filter/conjunction_filter.cpp +1 -0
  186. package/src/duckdb/src/planner/filter/constant_filter.cpp +21 -0
  187. package/src/duckdb/src/planner/filter/in_filter.cpp +4 -7
  188. package/src/duckdb/src/planner/logical_operator.cpp +5 -3
  189. package/src/duckdb/src/planner/planner.cpp +1 -1
  190. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +2 -0
  191. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +3 -4
  192. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -5
  193. package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +4 -4
  194. package/src/duckdb/src/storage/compression/fsst.cpp +2 -2
  195. package/src/duckdb/src/storage/compression/roaring/common.cpp +10 -1
  196. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +11 -6
  197. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +4 -0
  198. package/src/duckdb/src/storage/compression/zstd.cpp +6 -0
  199. package/src/duckdb/src/storage/data_table.cpp +104 -109
  200. package/src/duckdb/src/storage/local_storage.cpp +8 -6
  201. package/src/duckdb/src/storage/magic_bytes.cpp +1 -1
  202. package/src/duckdb/src/storage/serialization/serialize_dependency.cpp +3 -3
  203. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +3 -3
  204. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +7 -5
  205. package/src/duckdb/src/storage/single_file_block_manager.cpp +95 -28
  206. package/src/duckdb/src/storage/storage_info.cpp +38 -0
  207. package/src/duckdb/src/storage/storage_manager.cpp +11 -0
  208. package/src/duckdb/src/storage/table/column_data.cpp +4 -0
  209. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -3
  210. package/src/duckdb/src/storage/table/row_group_collection.cpp +67 -68
  211. package/src/duckdb/src/storage/table/table_statistics.cpp +4 -4
  212. package/src/duckdb/src/storage/table_index_list.cpp +41 -15
  213. package/src/duckdb/src/storage/wal_replay.cpp +3 -1
  214. package/src/duckdb/src/storage/write_ahead_log.cpp +11 -4
  215. package/src/duckdb/src/transaction/meta_transaction.cpp +1 -1
  216. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
  217. package/src/duckdb/third_party/httplib/httplib.hpp +0 -1
  218. package/src/duckdb/third_party/re2/util/logging.h +10 -10
  219. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
@@ -662,7 +662,9 @@ static double CalculateTypeSimilarity(const LogicalType &merged, const LogicalTy
662
662
  }
663
663
 
664
664
  // Only maps and structs can be merged into a map
665
- D_ASSERT(type.id() == LogicalTypeId::STRUCT);
665
+ if (type.id() != LogicalTypeId::STRUCT) {
666
+ return -1;
667
+ }
666
668
  return CalculateMapAndStructSimilarity(merged, type, false, max_depth, depth);
667
669
  }
668
670
  case LogicalTypeId::LIST: {
@@ -498,6 +498,7 @@ void BasicColumnWriter::BeginWrite(ColumnWriterState &state_p) {
498
498
  hdr.data_page_header.repetition_level_encoding = Encoding::RLE;
499
499
 
500
500
  write_info.temp_writer = make_uniq<MemoryStream>(
501
+ Allocator::Get(writer.GetContext()),
501
502
  MaxValue<idx_t>(NextPowerOfTwo(page_info.estimated_page_size), MemoryStream::DEFAULT_INITIAL_CAPACITY));
502
503
  write_info.write_count = page_info.empty_count;
503
504
  write_info.max_write_count = page_info.row_count;
@@ -717,6 +718,7 @@ void BasicColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
717
718
  column_chunk.meta_data.total_compressed_size =
718
719
  UnsafeNumericCast<int64_t>(column_writer.GetTotalWritten() - start_offset);
719
720
  column_chunk.meta_data.total_uncompressed_size = UnsafeNumericCast<int64_t>(total_uncompressed_size);
721
+ state.row_group.total_byte_size += column_chunk.meta_data.total_uncompressed_size;
720
722
 
721
723
  if (state.bloom_filter) {
722
724
  writer.BufferBloomFilter(state.col_idx, std::move(state.bloom_filter));
@@ -1173,7 +1175,7 @@ void WriteValue(DlbaEncoder &encoder, WriteStream &writer, const string_t &value
1173
1175
 
1174
1176
  // helpers to get size from strings
1175
1177
  template <class SRC>
1176
- static constexpr idx_t GetDlbaStringSize(const SRC &src_value) {
1178
+ static idx_t GetDlbaStringSize(const SRC &src_value) {
1177
1179
  return 0;
1178
1180
  }
1179
1181
 
@@ -1311,21 +1313,26 @@ public:
1311
1313
 
1312
1314
  auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
1313
1315
  if (state.dictionary.size() == 0 || state.dictionary.size() > writer.DictionarySizeLimit()) {
1314
- // If we aren't doing dictionary encoding, the following encodings are virtually always better than PLAIN
1315
- switch (type) {
1316
- case Type::type::INT32:
1317
- case Type::type::INT64:
1318
- state.encoding = Encoding::DELTA_BINARY_PACKED;
1319
- break;
1320
- case Type::type::BYTE_ARRAY:
1321
- state.encoding = Encoding::DELTA_LENGTH_BYTE_ARRAY;
1322
- break;
1323
- case Type::type::FLOAT:
1324
- case Type::type::DOUBLE:
1325
- state.encoding = Encoding::BYTE_STREAM_SPLIT;
1326
- break;
1327
- default:
1316
+ if (writer.GetParquetVersion() == ParquetVersion::V1) {
1317
+ // Can't do the cool stuff for V1
1328
1318
  state.encoding = Encoding::PLAIN;
1319
+ } else {
1320
+ // If we aren't doing dictionary encoding, these encodings are virtually always better than PLAIN
1321
+ switch (type) {
1322
+ case Type::type::INT32:
1323
+ case Type::type::INT64:
1324
+ state.encoding = Encoding::DELTA_BINARY_PACKED;
1325
+ break;
1326
+ case Type::type::BYTE_ARRAY:
1327
+ state.encoding = Encoding::DELTA_LENGTH_BYTE_ARRAY;
1328
+ break;
1329
+ case Type::type::FLOAT:
1330
+ case Type::type::DOUBLE:
1331
+ state.encoding = Encoding::BYTE_STREAM_SPLIT;
1332
+ break;
1333
+ default:
1334
+ state.encoding = Encoding::PLAIN;
1335
+ }
1329
1336
  }
1330
1337
  state.dictionary.clear();
1331
1338
  }
@@ -1463,8 +1470,9 @@ public:
1463
1470
  make_uniq<ParquetBloomFilter>(state.dictionary.size(), writer.BloomFilterFalsePositiveRatio());
1464
1471
 
1465
1472
  // first write the contents of the dictionary page to a temporary buffer
1466
- auto temp_writer = make_uniq<MemoryStream>(MaxValue<idx_t>(
1467
- NextPowerOfTwo(state.dictionary.size() * sizeof(TGT)), MemoryStream::DEFAULT_INITIAL_CAPACITY));
1473
+ auto temp_writer = make_uniq<MemoryStream>(
1474
+ Allocator::Get(writer.GetContext()), MaxValue<idx_t>(NextPowerOfTwo(state.dictionary.size() * sizeof(TGT)),
1475
+ MemoryStream::DEFAULT_INITIAL_CAPACITY));
1468
1476
  for (idx_t r = 0; r < values.size(); r++) {
1469
1477
  const TGT target_value = OP::template Operation<SRC, TGT>(values[r]);
1470
1478
  // update the statistics
@@ -1838,7 +1846,7 @@ public:
1838
1846
  auto enum_count = EnumType::GetSize(enum_type);
1839
1847
  auto string_values = FlatVector::GetData<string_t>(enum_values);
1840
1848
  // first write the contents of the dictionary page to a temporary buffer
1841
- auto temp_writer = make_uniq<MemoryStream>();
1849
+ auto temp_writer = make_uniq<MemoryStream>(Allocator::Get(writer.GetContext()));
1842
1850
  for (idx_t r = 0; r < enum_count; r++) {
1843
1851
  D_ASSERT(!FlatVector::IsNull(enum_values, r));
1844
1852
  // update the statistics
@@ -216,12 +216,6 @@ private:
216
216
  void PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx);
217
217
  LogicalType DeriveLogicalType(const SchemaElement &s_ele);
218
218
 
219
- template <typename... Args>
220
- std::runtime_error FormatException(const string fmt_str, Args... params) {
221
- return std::runtime_error("Failed to read Parquet file \"" + file_name +
222
- "\": " + StringUtil::Format(fmt_str, params...));
223
- }
224
-
225
219
  private:
226
220
  unique_ptr<FileHandle> file_handle;
227
221
  };
@@ -68,13 +68,19 @@ struct ParquetBloomFilterEntry {
68
68
  idx_t column_idx;
69
69
  };
70
70
 
71
+ enum class ParquetVersion : uint8_t {
72
+ V1 = 1, //! Excludes DELTA_BINARY_PACKED, DELTA_LENGTH_BYTE_ARRAY, BYTE_STREAM_SPLIT
73
+ V2 = 2, //! Includes the encodings above
74
+ };
75
+
71
76
  class ParquetWriter {
72
77
  public:
73
78
  ParquetWriter(ClientContext &context, FileSystem &fs, string file_name, vector<LogicalType> types,
74
79
  vector<string> names, duckdb_parquet::CompressionCodec::type codec, ChildFieldIDs field_ids,
75
80
  const vector<pair<string, string>> &kv_metadata,
76
81
  shared_ptr<ParquetEncryptionConfig> encryption_config, idx_t dictionary_size_limit,
77
- double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl);
82
+ double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl,
83
+ ParquetVersion parquet_version);
78
84
 
79
85
  public:
80
86
  void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);
@@ -85,6 +91,9 @@ public:
85
91
  static duckdb_parquet::Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type);
86
92
  static void SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::SchemaElement &schema_ele);
87
93
 
94
+ ClientContext &GetContext() {
95
+ return context;
96
+ }
88
97
  duckdb_apache::thrift::protocol::TProtocol *GetProtocol() {
89
98
  return protocol.get();
90
99
  }
@@ -117,6 +126,9 @@ public:
117
126
  lock_guard<mutex> glock(lock);
118
127
  return file_meta_data.row_groups.size();
119
128
  }
129
+ ParquetVersion GetParquetVersion() const {
130
+ return parquet_version;
131
+ }
120
132
 
121
133
  uint32_t Write(const duckdb_apache::thrift::TBase &object);
122
134
  uint32_t WriteData(const const_data_ptr_t buffer, const uint32_t buffer_size);
@@ -129,6 +141,7 @@ public:
129
141
  void BufferBloomFilter(idx_t col_idx, unique_ptr<ParquetBloomFilter> bloom_filter);
130
142
 
131
143
  private:
144
+ ClientContext &context;
132
145
  string file_name;
133
146
  vector<LogicalType> sql_types;
134
147
  vector<string> column_names;
@@ -140,6 +153,7 @@ private:
140
153
  int64_t compression_level;
141
154
  bool debug_use_openssl;
142
155
  shared_ptr<EncryptionUtil> encryption_util;
156
+ ParquetVersion parquet_version;
143
157
 
144
158
  unique_ptr<BufferedFileWriter> writer;
145
159
  std::shared_ptr<duckdb_apache::thrift::protocol::TProtocol> protocol;
@@ -98,6 +98,7 @@ public:
98
98
  }
99
99
  if (new_size > alloc_len) {
100
100
  alloc_len = NextPowerOfTwo(new_size);
101
+ allocated_data.Reset(); // Have to reset before allocating new buffer (otherwise we use ~2x the memory)
101
102
  allocated_data = allocator.Allocate(alloc_len);
102
103
  ptr = allocated_data.get();
103
104
  }
@@ -203,6 +203,9 @@ struct ParquetWriteBindData : public TableFunctionData {
203
203
  ChildFieldIDs field_ids;
204
204
  //! The compression level, higher value is more
205
205
  int64_t compression_level = ZStdFileSystem::DefaultCompressionLevel();
206
+
207
+ //! Which encodings to include when writing
208
+ ParquetVersion parquet_version = ParquetVersion::V1;
206
209
  };
207
210
 
208
211
  struct ParquetWriteGlobalState : public GlobalFunctionData {
@@ -371,6 +374,7 @@ public:
371
374
  table_function.named_parameters["explicit_cardinality"] = LogicalType::UBIGINT;
372
375
  table_function.named_parameters["schema"] = LogicalTypeId::ANY;
373
376
  table_function.named_parameters["encryption_config"] = LogicalTypeId::ANY;
377
+ table_function.named_parameters["parquet_version"] = LogicalType::VARCHAR;
374
378
  table_function.get_partition_data = ParquetScanGetPartitionData;
375
379
  table_function.serialize = ParquetScanSerialize;
376
380
  table_function.deserialize = ParquetScanDeserialize;
@@ -1289,6 +1293,15 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBi
1289
1293
  }
1290
1294
  bind_data->compression_level = val;
1291
1295
  compression_level_set = true;
1296
+ } else if (loption == "parquet_version") {
1297
+ const auto roption = StringUtil::Upper(option.second[0].ToString());
1298
+ if (roption == "V1") {
1299
+ bind_data->parquet_version = ParquetVersion::V1;
1300
+ } else if (roption == "V2") {
1301
+ bind_data->parquet_version = ParquetVersion::V2;
1302
+ } else {
1303
+ throw BinderException("Expected parquet_version 'V1' or 'V2'");
1304
+ }
1292
1305
  } else {
1293
1306
  throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
1294
1307
  }
@@ -1319,7 +1332,7 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
1319
1332
  context, fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec,
1320
1333
  parquet_bind.field_ids.Copy(), parquet_bind.kv_metadata, parquet_bind.encryption_config,
1321
1334
  parquet_bind.dictionary_size_limit, parquet_bind.bloom_filter_false_positive_ratio,
1322
- parquet_bind.compression_level, parquet_bind.debug_use_openssl);
1335
+ parquet_bind.compression_level, parquet_bind.debug_use_openssl, parquet_bind.parquet_version);
1323
1336
  return std::move(global_state);
1324
1337
  }
1325
1338
 
@@ -1424,6 +1437,29 @@ duckdb_parquet::CompressionCodec::type EnumUtil::FromString<duckdb_parquet::Comp
1424
1437
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
1425
1438
  }
1426
1439
 
1440
+ template <>
1441
+ const char *EnumUtil::ToChars<ParquetVersion>(ParquetVersion value) {
1442
+ switch (value) {
1443
+ case ParquetVersion::V1:
1444
+ return "V1";
1445
+ case ParquetVersion::V2:
1446
+ return "V2";
1447
+ default:
1448
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
1449
+ }
1450
+ }
1451
+
1452
+ template <>
1453
+ ParquetVersion EnumUtil::FromString<ParquetVersion>(const char *value) {
1454
+ if (StringUtil::Equals(value, "V1")) {
1455
+ return ParquetVersion::V1;
1456
+ }
1457
+ if (StringUtil::Equals(value, "V2")) {
1458
+ return ParquetVersion::V2;
1459
+ }
1460
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
1461
+ }
1462
+
1427
1463
  static optional_idx SerializeCompressionLevel(const int64_t compression_level) {
1428
1464
  return compression_level < 0 ? NumericLimits<idx_t>::Maximum() - NumericCast<idx_t>(AbsValue(compression_level))
1429
1465
  : NumericCast<idx_t>(compression_level);
@@ -1455,13 +1491,25 @@ static void ParquetCopySerialize(Serializer &serializer, const FunctionData &bin
1455
1491
  bind_data.encryption_config, nullptr);
1456
1492
 
1457
1493
  // 108 was dictionary_compression_ratio_threshold, but was deleted
1494
+
1495
+ // To avoid doubly defining the default values in both ParquetWriteBindData and here,
1496
+ // and possibly making a mistake, we just get the values from ParquetWriteBindData.
1497
+ // We have to std::move them, otherwise MSVC will complain that it's not a "const T &&"
1458
1498
  const auto compression_level = SerializeCompressionLevel(bind_data.compression_level);
1459
1499
  D_ASSERT(DeserializeCompressionLevel(compression_level) == bind_data.compression_level);
1460
- serializer.WritePropertyWithDefault<optional_idx>(109, "compression_level", compression_level);
1461
- serializer.WriteProperty(110, "row_groups_per_file", bind_data.row_groups_per_file);
1462
- serializer.WriteProperty(111, "debug_use_openssl", bind_data.debug_use_openssl);
1463
- serializer.WriteProperty(112, "dictionary_size_limit", bind_data.dictionary_size_limit);
1464
- serializer.WriteProperty(113, "bloom_filter_false_positive_ratio", bind_data.bloom_filter_false_positive_ratio);
1500
+ ParquetWriteBindData default_value;
1501
+ serializer.WritePropertyWithDefault(109, "compression_level", compression_level);
1502
+ serializer.WritePropertyWithDefault(110, "row_groups_per_file", bind_data.row_groups_per_file,
1503
+ default_value.row_groups_per_file);
1504
+ serializer.WritePropertyWithDefault(111, "debug_use_openssl", bind_data.debug_use_openssl,
1505
+ default_value.debug_use_openssl);
1506
+ serializer.WritePropertyWithDefault(112, "dictionary_size_limit", bind_data.dictionary_size_limit,
1507
+ default_value.dictionary_size_limit);
1508
+ serializer.WritePropertyWithDefault(113, "bloom_filter_false_positive_ratio",
1509
+ bind_data.bloom_filter_false_positive_ratio,
1510
+ default_value.bloom_filter_false_positive_ratio);
1511
+ serializer.WritePropertyWithDefault(114, "parquet_version", bind_data.parquet_version,
1512
+ default_value.parquet_version);
1465
1513
  }
1466
1514
 
1467
1515
  static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserializer, CopyFunction &function) {
@@ -1473,21 +1521,25 @@ static unique_ptr<FunctionData> ParquetCopyDeserialize(Deserializer &deserialize
1473
1521
  data->row_group_size_bytes = deserializer.ReadProperty<idx_t>(104, "row_group_size_bytes");
1474
1522
  data->kv_metadata = deserializer.ReadProperty<vector<pair<string, string>>>(105, "kv_metadata");
1475
1523
  data->field_ids = deserializer.ReadProperty<ChildFieldIDs>(106, "field_ids");
1476
- deserializer.ReadPropertyWithExplicitDefault<shared_ptr<ParquetEncryptionConfig>>(107, "encryption_config",
1477
- data->encryption_config, nullptr);
1524
+ deserializer.ReadPropertyWithExplicitDefault<shared_ptr<ParquetEncryptionConfig>>(
1525
+ 107, "encryption_config", data->encryption_config, std::move(ParquetWriteBindData().encryption_config));
1478
1526
  deserializer.ReadDeletedProperty<double>(108, "dictionary_compression_ratio_threshold");
1479
1527
 
1480
1528
  optional_idx compression_level;
1481
1529
  deserializer.ReadPropertyWithDefault<optional_idx>(109, "compression_level", compression_level);
1482
1530
  data->compression_level = DeserializeCompressionLevel(compression_level);
1483
1531
  D_ASSERT(SerializeCompressionLevel(data->compression_level) == compression_level);
1484
- data->row_groups_per_file =
1485
- deserializer.ReadPropertyWithExplicitDefault<optional_idx>(110, "row_groups_per_file", optional_idx::Invalid());
1486
- data->debug_use_openssl = deserializer.ReadPropertyWithExplicitDefault<bool>(111, "debug_use_openssl", true);
1487
- data->dictionary_size_limit =
1488
- deserializer.ReadPropertyWithExplicitDefault<idx_t>(112, "dictionary_size_limit", data->row_group_size / 10);
1489
- data->bloom_filter_false_positive_ratio =
1490
- deserializer.ReadPropertyWithExplicitDefault<double>(113, "bloom_filter_false_positive_ratio", 0.01);
1532
+ ParquetWriteBindData default_value;
1533
+ data->row_groups_per_file = deserializer.ReadPropertyWithExplicitDefault<optional_idx>(
1534
+ 110, "row_groups_per_file", default_value.row_groups_per_file);
1535
+ data->debug_use_openssl =
1536
+ deserializer.ReadPropertyWithExplicitDefault<bool>(111, "debug_use_openssl", default_value.debug_use_openssl);
1537
+ data->dictionary_size_limit = deserializer.ReadPropertyWithExplicitDefault<idx_t>(
1538
+ 112, "dictionary_size_limit", default_value.dictionary_size_limit);
1539
+ data->bloom_filter_false_positive_ratio = deserializer.ReadPropertyWithExplicitDefault<double>(
1540
+ 113, "bloom_filter_false_positive_ratio", default_value.bloom_filter_false_positive_ratio);
1541
+ data->parquet_version =
1542
+ deserializer.ReadPropertyWithExplicitDefault(114, "parquet_version", default_value.parquet_version);
1491
1543
 
1492
1544
  return std::move(data);
1493
1545
  }
@@ -492,7 +492,8 @@ void ParquetReader::InitializeSchema(ClientContext &context) {
492
492
  }
493
493
  // check if we like this schema
494
494
  if (file_meta_data->schema.size() < 2) {
495
- throw FormatException("Need at least one non-root column in the file");
495
+ throw InvalidInputException("Failed to read Parquet file '%s': Need at least one non-root column in the file",
496
+ file_name);
496
497
  }
497
498
  root_reader = CreateReader(context);
498
499
  auto &root_type = root_reader->Type();
@@ -778,12 +779,13 @@ void ParquetReader::PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t c
778
779
  FilterPropagateResult prune_result;
779
780
  // TODO we might not have stats but STILL a bloom filter so move this up
780
781
  // check the bloom filter if present
781
- if (!column_reader.Type().IsNested() &&
782
+ bool is_generated_column = column_reader.FileIdx() >= group.columns.size();
783
+ if (!column_reader.Type().IsNested() && !is_generated_column &&
782
784
  ParquetStatisticsUtils::BloomFilterSupported(column_reader.Type().id()) &&
783
785
  ParquetStatisticsUtils::BloomFilterExcludes(filter, group.columns[column_reader.FileIdx()].meta_data,
784
786
  *state.thrift_file_proto, allocator)) {
785
787
  prune_result = FilterPropagateResult::FILTER_ALWAYS_FALSE;
786
- } else if (column_reader.Type().id() == LogicalTypeId::VARCHAR &&
788
+ } else if (column_reader.Type().id() == LogicalTypeId::VARCHAR && !is_generated_column &&
787
789
  group.columns[column_reader.FileIdx()].meta_data.statistics.__isset.min_value &&
788
790
  group.columns[column_reader.FileIdx()].meta_data.statistics.__isset.max_value) {
789
791
 
@@ -321,12 +321,12 @@ ParquetWriter::ParquetWriter(ClientContext &context, FileSystem &fs, string file
321
321
  const vector<pair<string, string>> &kv_metadata,
322
322
  shared_ptr<ParquetEncryptionConfig> encryption_config_p, idx_t dictionary_size_limit_p,
323
323
  double bloom_filter_false_positive_ratio_p, int64_t compression_level_p,
324
- bool debug_use_openssl_p)
325
- : file_name(std::move(file_name_p)), sql_types(std::move(types_p)), column_names(std::move(names_p)), codec(codec),
326
- field_ids(std::move(field_ids_p)), encryption_config(std::move(encryption_config_p)),
327
- dictionary_size_limit(dictionary_size_limit_p),
324
+ bool debug_use_openssl_p, ParquetVersion parquet_version)
325
+ : context(context), file_name(std::move(file_name_p)), sql_types(std::move(types_p)),
326
+ column_names(std::move(names_p)), codec(codec), field_ids(std::move(field_ids_p)),
327
+ encryption_config(std::move(encryption_config_p)), dictionary_size_limit(dictionary_size_limit_p),
328
328
  bloom_filter_false_positive_ratio(bloom_filter_false_positive_ratio_p), compression_level(compression_level_p),
329
- debug_use_openssl(debug_use_openssl_p) {
329
+ debug_use_openssl(debug_use_openssl_p), parquet_version(parquet_version) {
330
330
 
331
331
  // initialize the file writer
332
332
  writer = make_uniq<BufferedFileWriter>(fs, file_name.c_str(),
@@ -395,7 +395,6 @@ void ParquetWriter::PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGro
395
395
  // set up a new row group for this chunk collection
396
396
  auto &row_group = result.row_group;
397
397
  row_group.num_rows = NumericCast<int64_t>(buffer.Count());
398
- row_group.total_byte_size = NumericCast<int64_t>(buffer.SizeInBytes());
399
398
  row_group.__isset.file_offset = true;
400
399
 
401
400
  auto &states = result.states;
@@ -426,7 +426,12 @@ vector<CatalogSearchEntry> GetCatalogEntries(CatalogEntryRetriever &retriever, c
426
426
  entries.emplace_back(catalog, schema_name);
427
427
  }
428
428
  if (entries.empty()) {
429
- entries.emplace_back(catalog, DEFAULT_SCHEMA);
429
+ auto catalog_entry = Catalog::GetCatalogEntry(context, catalog);
430
+ if (catalog_entry) {
431
+ entries.emplace_back(catalog, catalog_entry->GetDefaultSchema());
432
+ } else {
433
+ entries.emplace_back(catalog, DEFAULT_SCHEMA);
434
+ }
430
435
  }
431
436
  } else {
432
437
  // specific catalog and schema provided
@@ -687,7 +692,7 @@ CatalogException Catalog::CreateMissingEntryException(CatalogEntryRetriever &ret
687
692
  // however, if there is an exact match in another schema, we will always show it
688
693
  static constexpr const double UNSEEN_PENALTY = 0.2;
689
694
  auto unseen_entries = SimilarEntriesInSchemas(context, entry_name, type, unseen_schemas);
690
- vector<string> suggestions;
695
+ set<string> suggestions;
691
696
  if (!unseen_entries.empty() && (unseen_entries[0].score == 1.0 || unseen_entries[0].score - UNSEEN_PENALTY >
692
697
  (entries.empty() ? 0.0 : entries[0].score))) {
693
698
  // the closest matching entry requires qualification as it is not in the default search path
@@ -698,19 +703,19 @@ CatalogException Catalog::CreateMissingEntryException(CatalogEntryRetriever &ret
698
703
  bool qualify_database;
699
704
  bool qualify_schema;
700
705
  FindMinimalQualification(retriever, catalog_name, schema_name, qualify_database, qualify_schema);
701
- suggestions.push_back(unseen_entry.GetQualifiedName(qualify_database, qualify_schema));
706
+ auto qualified_name = unseen_entry.GetQualifiedName(qualify_database, qualify_schema);
707
+ suggestions.insert(qualified_name);
702
708
  }
703
709
  } else if (!entries.empty()) {
704
710
  for (auto &entry : entries) {
705
- suggestions.push_back(entry.name);
711
+ suggestions.insert(entry.name);
706
712
  }
707
713
  }
708
714
 
709
715
  string did_you_mean;
710
- std::sort(suggestions.begin(), suggestions.end());
711
716
  if (suggestions.size() > 2) {
712
- auto last = suggestions.back();
713
- suggestions.pop_back();
717
+ string last = *suggestions.rbegin();
718
+ suggestions.erase(last);
714
719
  did_you_mean = StringUtil::Join(suggestions, ", ") + ", or " + last;
715
720
  } else {
716
721
  did_you_mean = StringUtil::Join(suggestions, " or ");
@@ -968,12 +973,16 @@ optional_ptr<SchemaCatalogEntry> Catalog::GetSchema(CatalogEntryRetriever &retri
968
973
  // skip if it is not an attached database
969
974
  continue;
970
975
  }
971
- auto on_not_found = i + 1 == entries.size() ? if_not_found : OnEntryNotFound::RETURN_NULL;
976
+ const auto on_not_found = i + 1 == entries.size() ? if_not_found : OnEntryNotFound::RETURN_NULL;
972
977
  auto result = catalog->GetSchema(retriever.GetContext(), schema_name, on_not_found, error_context);
973
978
  if (result) {
974
979
  return result;
975
980
  }
976
981
  }
982
+ // Catalog has not been found.
983
+ if (if_not_found == OnEntryNotFound::THROW_EXCEPTION) {
984
+ throw CatalogException(error_context, "Catalog with name %s does not exist!", catalog_name);
985
+ }
977
986
  return nullptr;
978
987
  }
979
988
 
@@ -1073,6 +1082,10 @@ optional_ptr<DependencyManager> Catalog::GetDependencyManager() {
1073
1082
  return nullptr;
1074
1083
  }
1075
1084
 
1085
+ string Catalog::GetDefaultSchema() const {
1086
+ return DEFAULT_SCHEMA;
1087
+ }
1088
+
1076
1089
  //! Whether this catalog has a default table. Catalogs with a default table can be queries by their catalog name
1077
1090
  bool Catalog::HasDefaultTable() const {
1078
1091
  return !default_table.empty();
@@ -165,7 +165,7 @@ void CatalogSearchPath::Set(vector<CatalogSearchEntry> new_paths, CatalogSetPath
165
165
  if (path.catalog.empty()) {
166
166
  auto catalog = Catalog::GetCatalogEntry(context, path.schema);
167
167
  if (catalog) {
168
- auto schema = catalog->GetSchema(context, DEFAULT_SCHEMA, OnEntryNotFound::RETURN_NULL);
168
+ auto schema = catalog->GetSchema(context, catalog->GetDefaultSchema(), OnEntryNotFound::RETURN_NULL);
169
169
  if (schema) {
170
170
  path.catalog = std::move(path.schema);
171
171
  path.schema = schema->name;
@@ -205,6 +205,22 @@ string CatalogSearchPath::GetDefaultSchema(const string &catalog) {
205
205
  return DEFAULT_SCHEMA;
206
206
  }
207
207
 
208
+ string CatalogSearchPath::GetDefaultSchema(ClientContext &context, const string &catalog) {
209
+ for (auto &path : paths) {
210
+ if (path.catalog == TEMP_CATALOG) {
211
+ continue;
212
+ }
213
+ if (StringUtil::CIEquals(path.catalog, catalog)) {
214
+ return path.schema;
215
+ }
216
+ }
217
+ auto catalog_entry = Catalog::GetCatalogEntry(context, catalog);
218
+ if (catalog_entry) {
219
+ return catalog_entry->GetDefaultSchema();
220
+ }
221
+ return DEFAULT_SCHEMA;
222
+ }
223
+
208
224
  string CatalogSearchPath::GetDefaultCatalog(const string &schema) {
209
225
  if (DefaultSchemaGenerator::IsDefaultSchema(schema)) {
210
226
  return SYSTEM_CATALOG;
@@ -353,7 +353,7 @@ bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name,
353
353
  // push the old entry in the undo buffer for this transaction
354
354
  if (transaction.transaction) {
355
355
  // serialize the AlterInfo into a temporary buffer
356
- MemoryStream stream;
356
+ MemoryStream stream(Allocator::Get(*transaction.db));
357
357
  BinarySerializer serializer(stream);
358
358
  serializer.Begin();
359
359
  serializer.WriteProperty(100, "column_name", alter_info.GetColumnName());
@@ -163,9 +163,6 @@ static const DefaultMacro internal_macros[] = {
163
163
 
164
164
  // date functions
165
165
  {DEFAULT_SCHEMA, "date_add", {"date", "interval", nullptr}, {{nullptr, nullptr}}, "date + interval"},
166
- {DEFAULT_SCHEMA, "current_date", {nullptr}, {{nullptr, nullptr}}, "current_timestamp::DATE"},
167
- {DEFAULT_SCHEMA, "today", {nullptr}, {{nullptr, nullptr}}, "current_timestamp::DATE"},
168
- {DEFAULT_SCHEMA, "get_current_time", {nullptr}, {{nullptr, nullptr}}, "current_timestamp::TIMETZ"},
169
166
 
170
167
  // regexp functions
171
168
  {DEFAULT_SCHEMA, "regexp_split_to_table", {"text", "pattern", nullptr}, {{nullptr, nullptr}}, "unnest(string_split_regex(text, pattern))"},
@@ -61,6 +61,13 @@ LogicalDependency::LogicalDependency(CatalogEntry &entry) {
61
61
  }
62
62
  }
63
63
 
64
+ LogicalDependency::LogicalDependency(optional_ptr<Catalog> catalog_p, CatalogEntryInfo entry_p, string catalog_str)
65
+ : entry(std::move(entry_p)), catalog(std::move(catalog_str)) {
66
+ if (catalog_p) {
67
+ catalog = catalog_p->GetName();
68
+ }
69
+ }
70
+
64
71
  bool LogicalDependency::operator==(const LogicalDependency &other) const {
65
72
  return other.entry.name == entry.name && other.entry.schema == entry.schema && other.entry.type == entry.type;
66
73
  }
@@ -53,7 +53,6 @@ AdbcStatusCode duckdb_adbc_init(int version, void *driver, struct AdbcError *err
53
53
  adbc_driver->ConnectionGetInfo = duckdb_adbc::ConnectionGetInfo;
54
54
  adbc_driver->StatementGetParameterSchema = duckdb_adbc::StatementGetParameterSchema;
55
55
  adbc_driver->ConnectionGetTableSchema = duckdb_adbc::ConnectionGetTableSchema;
56
- adbc_driver->StatementSetSubstraitPlan = duckdb_adbc::StatementSetSubstraitPlan;
57
56
  return ADBC_STATUS_OK;
58
57
  }
59
58
 
@@ -70,7 +69,6 @@ struct DuckDBAdbcStatementWrapper {
70
69
  ArrowArrayStream ingestion_stream;
71
70
  IngestionMode ingestion_mode = IngestionMode::CREATE;
72
71
  bool temporary_table = false;
73
- uint8_t *substrait_plan;
74
72
  uint64_t plan_length;
75
73
  };
76
74
 
@@ -157,36 +155,6 @@ AdbcStatusCode DatabaseNew(struct AdbcDatabase *database, struct AdbcError *erro
157
155
  return CheckResult(res, error, "Failed to allocate");
158
156
  }
159
157
 
160
- AdbcStatusCode StatementSetSubstraitPlan(struct AdbcStatement *statement, const uint8_t *plan, size_t length,
161
- struct AdbcError *error) {
162
- if (!statement) {
163
- SetError(error, "Statement is not set");
164
- return ADBC_STATUS_INVALID_ARGUMENT;
165
- }
166
- if (!plan) {
167
- SetError(error, "Substrait Plan is not set");
168
- return ADBC_STATUS_INVALID_ARGUMENT;
169
- }
170
- if (length == 0) {
171
- SetError(error, "Can't execute plan with size = 0");
172
- return ADBC_STATUS_INVALID_ARGUMENT;
173
- }
174
- auto wrapper = static_cast<DuckDBAdbcStatementWrapper *>(statement->private_data);
175
- if (wrapper->ingestion_stream.release) {
176
- // Release any resources currently held by the ingestion stream before we overwrite it
177
- wrapper->ingestion_stream.release(&wrapper->ingestion_stream);
178
- wrapper->ingestion_stream.release = nullptr;
179
- }
180
- if (wrapper->statement) {
181
- duckdb_destroy_prepare(&wrapper->statement);
182
- wrapper->statement = nullptr;
183
- }
184
- wrapper->substrait_plan = static_cast<uint8_t *>(malloc(sizeof(uint8_t) * length));
185
- wrapper->plan_length = length;
186
- memcpy(wrapper->substrait_plan, plan, length);
187
- return ADBC_STATUS_OK;
188
- }
189
-
190
158
  AdbcStatusCode DatabaseSetOption(struct AdbcDatabase *database, const char *key, const char *value,
191
159
  struct AdbcError *error) {
192
160
  if (!database) {
@@ -677,7 +645,6 @@ AdbcStatusCode StatementNew(struct AdbcConnection *connection, struct AdbcStatem
677
645
  statement_wrapper->ingestion_stream.release = nullptr;
678
646
  statement_wrapper->ingestion_table_name = nullptr;
679
647
  statement_wrapper->db_schema = nullptr;
680
- statement_wrapper->substrait_plan = nullptr;
681
648
  statement_wrapper->temporary_table = false;
682
649
 
683
650
  statement_wrapper->ingestion_mode = IngestionMode::CREATE;
@@ -709,10 +676,6 @@ AdbcStatusCode StatementRelease(struct AdbcStatement *statement, struct AdbcErro
709
676
  free(wrapper->db_schema);
710
677
  wrapper->db_schema = nullptr;
711
678
  }
712
- if (wrapper->substrait_plan) {
713
- free(wrapper->substrait_plan);
714
- wrapper->substrait_plan = nullptr;
715
- }
716
679
  free(statement->private_data);
717
680
  statement->private_data = nullptr;
718
681
  return ADBC_STATUS_OK;
@@ -805,25 +768,7 @@ AdbcStatusCode StatementExecuteQuery(struct AdbcStatement *statement, struct Arr
805
768
  if (has_stream && to_table) {
806
769
  return IngestToTableFromBoundStream(wrapper, error);
807
770
  }
808
- if (wrapper->substrait_plan != nullptr) {
809
- auto plan_str = std::string(reinterpret_cast<const char *>(wrapper->substrait_plan), wrapper->plan_length);
810
- duckdb::vector<duckdb::Value> params;
811
- params.emplace_back(duckdb::Value::BLOB_RAW(plan_str));
812
- duckdb::unique_ptr<duckdb::QueryResult> query_result;
813
- try {
814
- query_result = reinterpret_cast<duckdb::Connection *>(wrapper->connection)
815
- ->TableFunction("from_substrait", params)
816
- ->Execute();
817
- } catch (duckdb::Exception &e) {
818
- std::string error_msg = "It was not possible to execute substrait query. " + std::string(e.what());
819
- SetError(error, error_msg);
820
- return ADBC_STATUS_INVALID_ARGUMENT;
821
- }
822
- auto arrow_wrapper = new duckdb::ArrowResultWrapper();
823
- arrow_wrapper->result =
824
- duckdb::unique_ptr_cast<duckdb::QueryResult, duckdb::MaterializedQueryResult>(std::move(query_result));
825
- wrapper->result = reinterpret_cast<duckdb_arrow>(arrow_wrapper);
826
- } else if (has_stream) {
771
+ if (has_stream) {
827
772
  // A stream was bound to the statement, use that to bind parameters
828
773
  duckdb::unique_ptr<duckdb::QueryResult> result;
829
774
  ArrowArrayStream stream = wrapper->ingestion_stream;
@@ -73,14 +73,15 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
73
73
  InitializeChild(root_holder.nested_children.back()[0], root_holder);
74
74
  child.children = &root_holder.nested_children_ptr.back()[0];
75
75
  child.children[0]->name = "entries";
76
+ child.children[0]->flags = 0; // Set the 'entries' field to non-nullable
76
77
  SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), options, context);
77
78
  }
78
79
 
79
80
  bool SetArrowExtension(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
80
81
  ClientContext &context) {
81
82
  auto &config = DBConfig::GetConfig(context);
82
- if (config.HasArrowExtension(type.id())) {
83
- auto arrow_extension = config.GetArrowExtension(type.id());
83
+ if (config.HasArrowExtension(type)) {
84
+ auto arrow_extension = config.GetArrowExtension(type);
84
85
  arrow_extension.PopulateArrowSchema(root_holder, child, type, context, arrow_extension);
85
86
  return true;
86
87
  }