duckdb 1.1.2-dev2.0 → 1.1.2-dev6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (289) hide show
  1. package/binding.gyp +4 -2
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/third_party/icu/common/putil.cpp +0 -5
  4. package/src/duckdb/extension/icu/third_party/icu/common/rbbiscan.cpp +1 -1
  5. package/src/duckdb/extension/icu/third_party/icu/common/rbbitblb.cpp +1 -1
  6. package/src/duckdb/extension/icu/third_party/icu/common/ucurr.cpp +1 -1
  7. package/src/duckdb/extension/icu/third_party/icu/common/uresbund.cpp +1 -1
  8. package/src/duckdb/extension/icu/third_party/icu/common/uresimp.h +31 -31
  9. package/src/duckdb/extension/icu/third_party/icu/common/ustring.cpp +1 -1
  10. package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
  11. package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +12 -12
  12. package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
  13. package/src/duckdb/extension/icu/third_party/icu/i18n/listformatter.cpp +4 -4
  14. package/src/duckdb/extension/icu/third_party/icu/i18n/number_decimalquantity.h +1 -1
  15. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
  16. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +28 -28
  17. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +7 -7
  18. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/ucol.h +1 -1
  19. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/ucoleitr.h +41 -41
  20. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/umsg.h +41 -41
  21. package/src/duckdb/extension/icu/third_party/icu/i18n/usrchimp.h +3 -3
  22. package/src/duckdb/extension/json/include/json_common.hpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_structure.cpp +13 -7
  24. package/src/duckdb/extension/parquet/column_writer.cpp +2 -1
  25. package/src/duckdb/extension/parquet/geo_parquet.cpp +24 -9
  26. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +3 -1
  27. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -0
  28. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -1
  29. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +0 -4
  30. package/src/duckdb/extension/parquet/parquet_extension.cpp +20 -6
  31. package/src/duckdb/extension/parquet/parquet_reader.cpp +1 -2
  32. package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -1
  33. package/src/duckdb/extension/parquet/serialize_parquet.cpp +0 -2
  34. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -1
  35. package/src/duckdb/src/catalog/default/default_functions.cpp +5 -5
  36. package/src/duckdb/src/common/allocator.cpp +3 -2
  37. package/src/duckdb/src/common/arrow/arrow_appender.cpp +1 -0
  38. package/src/duckdb/src/common/arrow/arrow_converter.cpp +11 -0
  39. package/src/duckdb/src/common/arrow/schema_metadata.cpp +6 -4
  40. package/src/duckdb/src/common/enum_util.cpp +33 -0
  41. package/src/duckdb/src/common/exception.cpp +3 -0
  42. package/src/duckdb/src/common/extra_type_info.cpp +1 -44
  43. package/src/duckdb/src/common/field_writer.cpp +97 -0
  44. package/src/duckdb/src/common/render_tree.cpp +7 -5
  45. package/src/duckdb/src/common/row_operations/row_match.cpp +359 -0
  46. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +27 -0
  47. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +36 -0
  48. package/src/duckdb/src/common/serializer/format_serializer.cpp +15 -0
  49. package/src/duckdb/src/common/serializer.cpp +24 -0
  50. package/src/duckdb/src/common/sort/comparators.cpp +2 -2
  51. package/src/duckdb/src/common/types/bit.cpp +57 -34
  52. package/src/duckdb/src/common/types/data_chunk.cpp +32 -29
  53. package/src/duckdb/src/common/types/vector_cache.cpp +12 -6
  54. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +14 -0
  55. package/src/duckdb/src/core_functions/aggregate/distributive/bitstring_agg.cpp +20 -1
  56. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -2
  57. package/src/duckdb/src/core_functions/aggregate/holistic/approx_top_k.cpp +32 -7
  58. package/src/duckdb/src/core_functions/function_list.cpp +1 -2
  59. package/src/duckdb/src/core_functions/scalar/bit/bitstring.cpp +23 -5
  60. package/src/duckdb/src/core_functions/scalar/date/date_diff.cpp +12 -6
  61. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +1 -1
  62. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +4 -3
  63. package/src/duckdb/src/execution/expression_executor/execute_case.cpp +4 -3
  64. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +2 -1
  65. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +3 -2
  66. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -1
  67. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +2 -1
  68. package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +3 -2
  69. package/src/duckdb/src/execution/expression_executor/execute_reference.cpp +1 -1
  70. package/src/duckdb/src/execution/expression_executor.cpp +9 -3
  71. package/src/duckdb/src/execution/expression_executor_state.cpp +11 -9
  72. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +238 -0
  73. package/src/duckdb/src/execution/index/art/plan_art.cpp +94 -0
  74. package/src/duckdb/src/execution/index/index_type_set.cpp +4 -1
  75. package/src/duckdb/src/execution/join_hashtable.cpp +7 -8
  76. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +6 -4
  77. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +4 -4
  78. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +1 -1
  79. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +44 -5
  80. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +28 -24
  81. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +25 -26
  82. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +5 -3
  83. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +4 -4
  84. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +2 -2
  85. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
  86. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +1 -1
  87. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine.cpp +1 -1
  88. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +2 -2
  89. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +1 -1
  90. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +1 -1
  91. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +73 -27
  92. package/src/duckdb/src/execution/operator/helper/physical_buffered_collector.cpp +1 -1
  93. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +695 -0
  94. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1487 -0
  95. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +72 -0
  96. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
  97. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +280 -0
  98. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +666 -0
  99. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +14 -4
  100. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +207 -0
  101. package/src/duckdb/src/execution/partitionable_hashtable.cpp +207 -0
  102. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +6 -1
  103. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +0 -4
  104. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +14 -87
  105. package/src/duckdb/src/execution/physical_plan/plan_export.cpp +1 -1
  106. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +1 -1
  107. package/src/duckdb/src/execution/reservoir_sample.cpp +1 -1
  108. package/src/duckdb/src/execution/window_executor.cpp +3 -3
  109. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
  110. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -2
  111. package/src/duckdb/src/function/scalar/string/concat.cpp +118 -151
  112. package/src/duckdb/src/function/table/arrow.cpp +13 -0
  113. package/src/duckdb/src/function/table/arrow_conversion.cpp +12 -7
  114. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  115. package/src/duckdb/src/function/table/read_csv.cpp +2 -30
  116. package/src/duckdb/src/function/table/sniff_csv.cpp +2 -1
  117. package/src/duckdb/src/function/table/system/duckdb_secrets.cpp +15 -7
  118. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  119. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
  120. package/src/duckdb/src/include/duckdb/common/atomic.hpp +13 -1
  121. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +3 -4
  122. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  123. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +2 -0
  124. package/src/duckdb/src/include/duckdb/common/exception.hpp +10 -0
  125. package/src/duckdb/src/include/duckdb/common/extra_type_info/enum_type_info.hpp +53 -0
  126. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +5 -5
  127. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +5 -0
  128. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +36 -33
  129. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +10 -13
  130. package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +1 -1
  131. package/src/duckdb/src/include/duckdb/common/types/vector_cache.hpp +7 -5
  132. package/src/duckdb/src/include/duckdb/common/windows_undefs.hpp +2 -1
  133. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +2 -0
  134. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +1 -1
  135. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +0 -6
  136. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +1 -1
  137. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +3 -2
  138. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -0
  139. package/src/duckdb/src/include/duckdb/execution/index/index_type.hpp +16 -1
  140. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +4 -4
  141. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +4 -2
  142. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +3 -2
  143. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +91 -36
  144. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/sniff_result.hpp +36 -0
  145. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +1 -1
  146. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +0 -1
  147. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_export.hpp +2 -5
  148. package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
  149. package/src/duckdb/src/include/duckdb/main/database.hpp +5 -0
  150. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  151. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +20 -22
  152. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -9
  153. package/src/duckdb/src/include/duckdb/main/secret/secret.hpp +8 -1
  154. package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -0
  155. package/src/duckdb/src/include/duckdb/optimizer/unnest_rewriter.hpp +5 -5
  156. package/src/duckdb/src/include/duckdb/parser/parsed_data/exported_table_data.hpp +15 -5
  157. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -0
  158. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -0
  159. package/src/duckdb/src/include/duckdb/planner/operator/logical_export.hpp +10 -13
  160. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +1 -0
  161. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -2
  162. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +1 -1
  163. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +0 -2
  164. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +1 -0
  165. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +5 -1
  166. package/src/duckdb/src/include/duckdb.h +2 -2
  167. package/src/duckdb/src/main/appender.cpp +3 -0
  168. package/src/duckdb/src/main/capi/profiling_info-c.cpp +5 -2
  169. package/src/duckdb/src/main/client_context.cpp +8 -2
  170. package/src/duckdb/src/main/connection.cpp +1 -1
  171. package/src/duckdb/src/main/database.cpp +13 -0
  172. package/src/duckdb/src/main/extension/extension_helper.cpp +1 -1
  173. package/src/duckdb/src/main/extension/extension_install.cpp +9 -1
  174. package/src/duckdb/src/main/extension/extension_load.cpp +3 -2
  175. package/src/duckdb/src/main/extension_install_info.cpp +1 -1
  176. package/src/duckdb/src/main/profiling_info.cpp +78 -58
  177. package/src/duckdb/src/main/query_profiler.cpp +79 -89
  178. package/src/duckdb/src/main/relation/read_csv_relation.cpp +1 -1
  179. package/src/duckdb/src/main/secret/secret.cpp +2 -1
  180. package/src/duckdb/src/main/secret/secret_manager.cpp +14 -0
  181. package/src/duckdb/src/optimizer/cte_filter_pusher.cpp +4 -2
  182. package/src/duckdb/src/optimizer/deliminator.cpp +0 -7
  183. package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +7 -0
  184. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -1
  185. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +21 -21
  186. package/src/duckdb/src/parallel/task_scheduler.cpp +9 -0
  187. package/src/duckdb/src/parser/parsed_data/exported_table_data.cpp +22 -0
  188. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +3 -0
  189. package/src/duckdb/src/parser/statement/insert_statement.cpp +7 -1
  190. package/src/duckdb/src/parser/transform/expression/transform_boolean_test.cpp +1 -1
  191. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +89 -87
  192. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -2
  193. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +4 -9
  194. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +4 -0
  195. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +2 -2
  196. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +4 -1
  197. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +4 -3
  198. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +13 -3
  199. package/src/duckdb/src/planner/expression_binder.cpp +1 -1
  200. package/src/duckdb/src/planner/operator/logical_export.cpp +28 -0
  201. package/src/duckdb/src/planner/table_binding.cpp +1 -2
  202. package/src/duckdb/src/planner/table_filter.cpp +6 -2
  203. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +2 -1
  204. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
  205. package/src/duckdb/src/storage/compression/bitpacking.cpp +7 -3
  206. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
  207. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +2 -2
  208. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +16 -0
  209. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +29 -0
  210. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +15 -0
  211. package/src/duckdb/src/storage/single_file_block_manager.cpp +2 -1
  212. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +3 -5
  213. package/src/duckdb/src/storage/storage_info.cpp +4 -4
  214. package/src/duckdb/src/storage/table/row_group_collection.cpp +1 -1
  215. package/src/duckdb/src/storage/table/row_version_manager.cpp +5 -1
  216. package/src/duckdb/src/storage/temporary_file_manager.cpp +1 -1
  217. package/src/duckdb/src/transaction/duck_transaction.cpp +15 -14
  218. package/src/duckdb/third_party/brotli/common/brotli_platform.h +1 -1
  219. package/src/duckdb/third_party/brotli/dec/decode.cpp +1 -1
  220. package/src/duckdb/third_party/brotli/enc/memory.cpp +4 -4
  221. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -1
  222. package/src/duckdb/third_party/hyperloglog/sds.cpp +1 -1
  223. package/src/duckdb/third_party/hyperloglog/sds.hpp +1 -1
  224. package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -1
  225. package/src/duckdb/third_party/libpg_query/include/datatype/timestamp.hpp +1 -1
  226. package/src/duckdb/third_party/libpg_query/include/mb/pg_wchar.hpp +1 -1
  227. package/src/duckdb/third_party/libpg_query/include/nodes/bitmapset.hpp +1 -1
  228. package/src/duckdb/third_party/libpg_query/include/nodes/lockoptions.hpp +1 -1
  229. package/src/duckdb/third_party/libpg_query/include/nodes/makefuncs.hpp +1 -1
  230. package/src/duckdb/third_party/libpg_query/include/nodes/pg_list.hpp +1 -1
  231. package/src/duckdb/third_party/libpg_query/include/nodes/value.hpp +1 -1
  232. package/src/duckdb/third_party/libpg_query/include/parser/gramparse.hpp +1 -1
  233. package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -1
  234. package/src/duckdb/third_party/libpg_query/include/parser/scanner.hpp +1 -1
  235. package/src/duckdb/third_party/libpg_query/include/parser/scansup.hpp +1 -1
  236. package/src/duckdb/third_party/libpg_query/include/pg_functions.hpp +1 -1
  237. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +1 -1
  238. package/src/duckdb/third_party/libpg_query/src_backend_nodes_list.cpp +1 -1
  239. package/src/duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.cpp +1 -1
  240. package/src/duckdb/third_party/libpg_query/src_backend_nodes_value.cpp +1 -1
  241. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1964 -1964
  242. package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +1 -1
  243. package/src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp +1 -1
  244. package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +1 -1
  245. package/src/duckdb/third_party/lz4/lz4.cpp +1 -1
  246. package/src/duckdb/third_party/mbedtls/include/des_alt.h +1 -1
  247. package/src/duckdb/third_party/mbedtls/include/mbedtls/aes_alt.h +1 -1
  248. package/src/duckdb/third_party/mbedtls/include/mbedtls/aria_alt.h +1 -1
  249. package/src/duckdb/third_party/mbedtls/include/mbedtls/asn1write.h +1 -1
  250. package/src/duckdb/third_party/mbedtls/include/mbedtls/camellia_alt.h +1 -1
  251. package/src/duckdb/third_party/mbedtls/include/mbedtls/ccm_alt.h +1 -1
  252. package/src/duckdb/third_party/mbedtls/include/mbedtls/chacha20.h +1 -1
  253. package/src/duckdb/third_party/mbedtls/include/mbedtls/chachapoly.h +1 -1
  254. package/src/duckdb/third_party/mbedtls/include/mbedtls/cmac.h +1 -1
  255. package/src/duckdb/third_party/mbedtls/include/mbedtls/config_psa.h +1 -1
  256. package/src/duckdb/third_party/mbedtls/include/mbedtls/ecdsa.h +1 -1
  257. package/src/duckdb/third_party/mbedtls/include/mbedtls/ecp.h +1 -1
  258. package/src/duckdb/third_party/mbedtls/include/mbedtls/gcm_alt.h +1 -1
  259. package/src/duckdb/third_party/mbedtls/include/mbedtls/md5.h +1 -1
  260. package/src/duckdb/third_party/mbedtls/include/mbedtls/nist_kw.h +1 -1
  261. package/src/duckdb/third_party/mbedtls/include/mbedtls/pkcs12.h +1 -1
  262. package/src/duckdb/third_party/mbedtls/include/mbedtls/pkcs5.h +1 -1
  263. package/src/duckdb/third_party/mbedtls/include/mbedtls/psa_util.h +1 -1
  264. package/src/duckdb/third_party/mbedtls/include/mbedtls/ripemd160.h +1 -1
  265. package/src/duckdb/third_party/mbedtls/include/mbedtls/threading.h +1 -1
  266. package/src/duckdb/third_party/mbedtls/include/mbedtls/timing.h +1 -1
  267. package/src/duckdb/third_party/mbedtls/include/platform_alt.h +1 -1
  268. package/src/duckdb/third_party/mbedtls/include/psa/crypto.h +1 -1
  269. package/src/duckdb/third_party/mbedtls/include/rsa_alt.h +1 -1
  270. package/src/duckdb/third_party/mbedtls/include/sha1_alt.h +1 -1
  271. package/src/duckdb/third_party/mbedtls/include/sha256_alt.h +1 -1
  272. package/src/duckdb/third_party/mbedtls/include/sha512_alt.h +1 -1
  273. package/src/duckdb/third_party/mbedtls/include/ssl_misc.h +1 -1
  274. package/src/duckdb/third_party/mbedtls/library/aesni.h +1 -1
  275. package/src/duckdb/third_party/mbedtls/library/padlock.h +1 -1
  276. package/src/duckdb/third_party/miniz/miniz.cpp +1 -1
  277. package/src/duckdb/third_party/parquet/parquet_types.cpp +1 -1
  278. package/src/duckdb/third_party/parquet/windows_compatibility.h +1 -1
  279. package/src/duckdb/third_party/pcg/pcg_extras.hpp +1 -1
  280. package/src/duckdb/third_party/pcg/pcg_uint128.hpp +1 -1
  281. package/src/duckdb/third_party/skiplist/Node.h +4 -4
  282. package/src/duckdb/third_party/snappy/snappy.cc +1 -1
  283. package/src/duckdb/third_party/snappy/snappy_version.hpp +1 -1
  284. package/src/duckdb/third_party/thrift/thrift/thrift-config.h +1 -1
  285. package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1 -1
  286. package/src/duckdb/third_party/zstd/include/zstd_static.h +1 -1
  287. package/src/duckdb/ub_src_execution_index_art.cpp +2 -0
  288. package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
  289. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
@@ -19,6 +19,12 @@ using std::stringstream;
19
19
 
20
20
  void ReorderTableEntries(catalog_entry_vector_t &tables);
21
21
 
22
+ PhysicalExport::PhysicalExport(vector<LogicalType> types, CopyFunction function, unique_ptr<CopyInfo> info,
23
+ idx_t estimated_cardinality, unique_ptr<BoundExportData> exported_tables)
24
+ : PhysicalOperator(PhysicalOperatorType::EXPORT, std::move(types), estimated_cardinality),
25
+ function(std::move(function)), info(std::move(info)), exported_tables(std::move(exported_tables)) {
26
+ }
27
+
22
28
  static void WriteCatalogEntries(stringstream &ss, catalog_entry_vector_t &entries) {
23
29
  for (auto &entry : entries) {
24
30
  if (entry.get().internal) {
@@ -121,6 +127,10 @@ void PhysicalExport::ExtractEntries(ClientContext &context, vector<reference<Sch
121
127
  ExportEntries &result) {
122
128
  for (auto &schema_p : schema_list) {
123
129
  auto &schema = schema_p.get();
130
+ auto &catalog = schema.ParentCatalog();
131
+ if (catalog.IsSystemCatalog() || catalog.IsTemporaryCatalog()) {
132
+ continue;
133
+ }
124
134
  if (!schema.internal) {
125
135
  result.schemas.push_back(schema);
126
136
  }
@@ -225,8 +235,8 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c
225
235
 
226
236
  // consider the order of tables because of foreign key constraint
227
237
  entries.tables.clear();
228
- for (idx_t i = 0; i < exported_tables.data.size(); i++) {
229
- entries.tables.push_back(exported_tables.data[i].entry);
238
+ for (idx_t i = 0; i < exported_tables->data.size(); i++) {
239
+ entries.tables.push_back(exported_tables->data[i].entry);
230
240
  }
231
241
 
232
242
  // order macro's by timestamp so nested macro's are imported nicely
@@ -252,8 +262,8 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c
252
262
  // write the load.sql file
253
263
  // for every table, we write COPY INTO statement with the specified options
254
264
  stringstream load_ss;
255
- for (idx_t i = 0; i < exported_tables.data.size(); i++) {
256
- auto exported_table_info = exported_tables.data[i].table_data;
265
+ for (idx_t i = 0; i < exported_tables->data.size(); i++) {
266
+ auto exported_table_info = exported_tables->data[i].table_data;
257
267
  WriteCopyStatement(fs, load_ss, *info, exported_table_info, function);
258
268
  }
259
269
  WriteStringStreamToFile(fs, load_ss, fs.JoinPath(info->file_path, "load.sql"));
@@ -0,0 +1,207 @@
1
+ #include "duckdb/execution/operator/schema/physical_create_index.hpp"
2
+
3
+ #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
4
+ #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
5
+ #include "duckdb/catalog/catalog_entry/duck_index_entry.hpp"
6
+ #include "duckdb/main/client_context.hpp"
7
+ #include "duckdb/storage/index.hpp"
8
+ #include "duckdb/storage/storage_manager.hpp"
9
+ #include "duckdb/storage/table/append_state.hpp"
10
+ #include "duckdb/main/database_manager.hpp"
11
+ #include "duckdb/execution/index/art/art_key.hpp"
12
+ #include "duckdb/execution/index/art/node.hpp"
13
+ #include "duckdb/execution/index/art/leaf.hpp"
14
+
15
+ namespace duckdb {
16
+
17
+ PhysicalCreateIndex::PhysicalCreateIndex(LogicalOperator &op, TableCatalogEntry &table_p,
18
+ const vector<column_t> &column_ids, unique_ptr<CreateIndexInfo> info,
19
+ vector<unique_ptr<Expression>> unbound_expressions,
20
+ idx_t estimated_cardinality, const bool sorted)
21
+ : PhysicalOperator(PhysicalOperatorType::CREATE_INDEX, op.types, estimated_cardinality),
22
+ table(table_p.Cast<DuckTableEntry>()), info(std::move(info)), unbound_expressions(std::move(unbound_expressions)),
23
+ sorted(sorted) {
24
+ // convert virtual column ids to storage column ids
25
+ for (auto &column_id : column_ids) {
26
+ storage_ids.push_back(table.GetColumns().LogicalToPhysical(LogicalIndex(column_id)).index);
27
+ }
28
+ }
29
+
30
+ //===--------------------------------------------------------------------===//
31
+ // Sink
32
+ //===--------------------------------------------------------------------===//
33
+
34
+ class CreateIndexGlobalSinkState : public GlobalSinkState {
35
+ public:
36
+ //! Global index to be added to the table
37
+ unique_ptr<Index> global_index;
38
+ };
39
+
40
+ class CreateIndexLocalSinkState : public LocalSinkState {
41
+ public:
42
+ explicit CreateIndexLocalSinkState(ClientContext &context) : arena_allocator(Allocator::Get(context)) {};
43
+
44
+ unique_ptr<Index> local_index;
45
+ ArenaAllocator arena_allocator;
46
+ vector<ARTKey> keys;
47
+ DataChunk key_chunk;
48
+ vector<column_t> key_column_ids;
49
+ };
50
+
51
+ unique_ptr<GlobalSinkState> PhysicalCreateIndex::GetGlobalSinkState(ClientContext &context) const {
52
+ auto state = make_uniq<CreateIndexGlobalSinkState>();
53
+
54
+ // create the global index
55
+ switch (info->index_type) {
56
+ case IndexType::ART: {
57
+ auto &storage = table.GetStorage();
58
+ state->global_index = make_uniq<ART>(storage_ids, TableIOManager::Get(storage), unbound_expressions,
59
+ info->constraint_type, storage.db);
60
+ break;
61
+ }
62
+ default:
63
+ throw InternalException("Unimplemented index type");
64
+ }
65
+ return (std::move(state));
66
+ }
67
+
68
+ unique_ptr<LocalSinkState> PhysicalCreateIndex::GetLocalSinkState(ExecutionContext &context) const {
69
+ auto state = make_uniq<CreateIndexLocalSinkState>(context.client);
70
+
71
+ // create the local index
72
+ switch (info->index_type) {
73
+ case IndexType::ART: {
74
+ auto &storage = table.GetStorage();
75
+ state->local_index = make_uniq<ART>(storage_ids, TableIOManager::Get(storage), unbound_expressions,
76
+ info->constraint_type, storage.db);
77
+ break;
78
+ }
79
+ default:
80
+ throw InternalException("Unimplemented index type");
81
+ }
82
+ state->keys = vector<ARTKey>(STANDARD_VECTOR_SIZE);
83
+ state->key_chunk.Initialize(Allocator::Get(context.client), state->local_index->logical_types);
84
+
85
+ for (idx_t i = 0; i < state->key_chunk.ColumnCount(); i++) {
86
+ state->key_column_ids.push_back(i);
87
+ }
88
+ return std::move(state);
89
+ }
90
+
91
+ SinkResultType PhysicalCreateIndex::SinkUnsorted(Vector &row_identifiers, OperatorSinkInput &input) const {
92
+
93
+ auto &l_state = input.local_state.Cast<CreateIndexLocalSinkState>();
94
+ auto count = l_state.key_chunk.size();
95
+
96
+ // get the corresponding row IDs
97
+ row_identifiers.Flatten(count);
98
+ auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
99
+
100
+ // insert the row IDs
101
+ auto &art = l_state.local_index->Cast<ART>();
102
+ for (idx_t i = 0; i < count; i++) {
103
+ if (!art.Insert(*art.tree, l_state.keys[i], 0, row_ids[i])) {
104
+ throw ConstraintException("Data contains duplicates on indexed column(s)");
105
+ }
106
+ }
107
+
108
+ return SinkResultType::NEED_MORE_INPUT;
109
+ }
110
+
111
+ SinkResultType PhysicalCreateIndex::SinkSorted(Vector &row_identifiers, OperatorSinkInput &input) const {
112
+
113
+ auto &l_state = input.local_state.Cast<CreateIndexLocalSinkState>();
114
+ auto &storage = table.GetStorage();
115
+ auto &l_index = l_state.local_index;
116
+
117
+ // create an ART from the chunk
118
+ auto art = make_uniq<ART>(l_index->column_ids, l_index->table_io_manager, l_index->unbound_expressions,
119
+ l_index->constraint_type, storage.db, l_index->Cast<ART>().allocators);
120
+ if (!art->ConstructFromSorted(l_state.key_chunk.size(), l_state.keys, row_identifiers)) {
121
+ throw ConstraintException("Data contains duplicates on indexed column(s)");
122
+ }
123
+
124
+ // merge into the local ART
125
+ if (!l_index->MergeIndexes(*art)) {
126
+ throw ConstraintException("Data contains duplicates on indexed column(s)");
127
+ }
128
+
129
+ return SinkResultType::NEED_MORE_INPUT;
130
+ }
131
+
132
+ SinkResultType PhysicalCreateIndex::Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const {
133
+
134
+ D_ASSERT(chunk.ColumnCount() >= 2);
135
+
136
+ // generate the keys for the given input
137
+ auto &l_state = input.local_state.Cast<CreateIndexLocalSinkState>();
138
+ l_state.key_chunk.ReferenceColumns(chunk, l_state.key_column_ids);
139
+ l_state.arena_allocator.Reset();
140
+ ART::GenerateKeys(l_state.arena_allocator, l_state.key_chunk, l_state.keys);
141
+
142
+ // insert the keys and their corresponding row IDs
143
+ auto &row_identifiers = chunk.data[chunk.ColumnCount() - 1];
144
+ if (sorted) {
145
+ return SinkSorted(row_identifiers, input);
146
+ }
147
+ return SinkUnsorted(row_identifiers, input);
148
+ }
149
+
150
+ SinkCombineResultType PhysicalCreateIndex::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
151
+
152
+ auto &gstate = input.global_state.Cast<CreateIndexGlobalSinkState>();
153
+ auto &lstate = input.local_state.Cast<CreateIndexLocalSinkState>();
154
+
155
+ // merge the local index into the global index
156
+ if (!gstate.global_index->MergeIndexes(*lstate.local_index)) {
157
+ throw ConstraintException("Data contains duplicates on indexed column(s)");
158
+ }
159
+
160
+ return SinkCombineResultType::FINISHED;
161
+ }
162
+
163
+ SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
164
+ OperatorSinkFinalizeInput &input) const {
165
+
166
+ // here, we set the resulting global index as the newly created index of the table
167
+ auto &state = input.global_state.Cast<CreateIndexGlobalSinkState>();
168
+
169
+ // vacuum excess memory and verify
170
+ state.global_index->Vacuum();
171
+ D_ASSERT(!state.global_index->VerifyAndToString(true).empty());
172
+
173
+ auto &storage = table.GetStorage();
174
+ if (!storage.IsRoot()) {
175
+ throw TransactionException("Transaction conflict: cannot add an index to a table that has been altered!");
176
+ }
177
+
178
+ auto &schema = table.schema;
179
+ auto index_entry = schema.CreateIndex(context, *info, table).get();
180
+ if (!index_entry) {
181
+ D_ASSERT(info->on_conflict == OnCreateConflict::IGNORE_ON_CONFLICT);
182
+ // index already exists, but error ignored because of IF NOT EXISTS
183
+ return SinkFinalizeType::READY;
184
+ }
185
+ auto &index = index_entry->Cast<DuckIndexEntry>();
186
+
187
+ index.index = state.global_index.get();
188
+ index.info = storage.info;
189
+ for (auto &parsed_expr : info->parsed_expressions) {
190
+ index.parsed_expressions.push_back(parsed_expr->Copy());
191
+ }
192
+
193
+ // add index to storage
194
+ storage.info->indexes.AddIndex(std::move(state.global_index));
195
+ return SinkFinalizeType::READY;
196
+ }
197
+
198
+ //===--------------------------------------------------------------------===//
199
+ // Source
200
+ //===--------------------------------------------------------------------===//
201
+
202
+ SourceResultType PhysicalCreateIndex::GetData(ExecutionContext &context, DataChunk &chunk,
203
+ OperatorSourceInput &input) const {
204
+ return SourceResultType::FINISHED;
205
+ }
206
+
207
+ } // namespace duckdb
@@ -0,0 +1,207 @@
1
+ #include "duckdb/execution/partitionable_hashtable.hpp"
2
+
3
+ #include "duckdb/common/radix_partitioning.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ RadixPartitionInfo::RadixPartitionInfo(const idx_t n_partitions_upper_bound)
8
+ : n_partitions(PreviousPowerOfTwo(n_partitions_upper_bound)),
9
+ radix_bits(RadixPartitioning::RadixBits(n_partitions)), radix_mask(RadixPartitioning::Mask(radix_bits)),
10
+ radix_shift(RadixPartitioning::Shift(radix_bits)) {
11
+
12
+ D_ASSERT(radix_bits <= RadixPartitioning::MAX_RADIX_BITS);
13
+ D_ASSERT(n_partitions > 0);
14
+ D_ASSERT(n_partitions == RadixPartitioning::NumberOfPartitions(radix_bits));
15
+ D_ASSERT(IsPowerOfTwo(n_partitions));
16
+ }
17
+
18
+ PartitionableHashTable::PartitionableHashTable(ClientContext &context, Allocator &allocator,
19
+ RadixPartitionInfo &partition_info_p, vector<LogicalType> group_types_p,
20
+ vector<LogicalType> payload_types_p,
21
+ vector<BoundAggregateExpression *> bindings_p)
22
+ : context(context), allocator(allocator), group_types(std::move(group_types_p)),
23
+ payload_types(std::move(payload_types_p)), bindings(std::move(bindings_p)), is_partitioned(false),
24
+ partition_info(partition_info_p), hashes(LogicalType::HASH), hashes_subset(LogicalType::HASH) {
25
+
26
+ sel_vectors.resize(partition_info.n_partitions);
27
+ sel_vector_sizes.resize(partition_info.n_partitions);
28
+ group_subset.Initialize(allocator, group_types);
29
+ if (!payload_types.empty()) {
30
+ payload_subset.Initialize(allocator, payload_types);
31
+ }
32
+
33
+ for (hash_t r = 0; r < partition_info.n_partitions; r++) {
34
+ sel_vectors[r].Initialize();
35
+ }
36
+
37
+ RowLayout layout;
38
+ layout.Initialize(group_types, AggregateObject::CreateAggregateObjects(bindings));
39
+ tuple_size = layout.GetRowWidth();
40
+ }
41
+
42
+ HtEntryType PartitionableHashTable::GetHTEntrySize() {
43
+ // we need at least STANDARD_VECTOR_SIZE entries to fit in the hash table
44
+ if (GroupedAggregateHashTable::GetMaxCapacity(HtEntryType::HT_WIDTH_32, tuple_size) < STANDARD_VECTOR_SIZE) {
45
+ return HtEntryType::HT_WIDTH_64;
46
+ }
47
+ return HtEntryType::HT_WIDTH_32;
48
+ }
49
+
50
+ bool OverMemoryLimit(ClientContext &context, const bool is_partitioned, const RadixPartitionInfo &partition_info,
51
+ const GroupedAggregateHashTable &ht) {
52
+ const auto n_partitions = is_partitioned ? partition_info.n_partitions : 1;
53
+ const auto max_memory = BufferManager::GetBufferManager(context).GetMaxMemory();
54
+ const auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads();
55
+ const auto memory_per_partition = 0.6 * max_memory / num_threads / n_partitions;
56
+ return ht.TotalSize() > memory_per_partition;
57
+ }
58
+
59
+ idx_t PartitionableHashTable::ListAddChunk(HashTableList &list, DataChunk &groups, Vector &group_hashes,
60
+ DataChunk &payload, const unsafe_vector<idx_t> &filter) {
61
+ // If this is false, a single AddChunk would overflow the max capacity
62
+ D_ASSERT(list.empty() || groups.size() <= list.back()->MaxCapacity());
63
+ if (list.empty() || list.back()->Count() + groups.size() >= list.back()->MaxCapacity() ||
64
+ OverMemoryLimit(context, is_partitioned, partition_info, *list.back())) {
65
+ idx_t new_capacity = GroupedAggregateHashTable::InitialCapacity();
66
+ if (!list.empty()) {
67
+ new_capacity = list.back()->Capacity();
68
+ // early release first part of ht and prevent adding of more data
69
+ list.back()->Finalize();
70
+ }
71
+ list.push_back(make_uniq<GroupedAggregateHashTable>(context, allocator, group_types, payload_types, bindings,
72
+ GetHTEntrySize(), new_capacity));
73
+ }
74
+ return list.back()->AddChunk(append_state, groups, group_hashes, payload, filter);
75
+ }
76
+
77
+ idx_t PartitionableHashTable::AddChunk(DataChunk &groups, DataChunk &payload, bool do_partition,
78
+ const unsafe_vector<idx_t> &filter) {
79
+ groups.Hash(hashes);
80
+
81
+ // we partition when we are asked to or when the unpartitioned ht runs out of space
82
+ if (!IsPartitioned() && do_partition) {
83
+ Partition(false);
84
+ }
85
+
86
+ if (!IsPartitioned()) {
87
+ return ListAddChunk(unpartitioned_hts, groups, hashes, payload, filter);
88
+ }
89
+
90
+ // makes no sense to do this with 1 partition
91
+ D_ASSERT(partition_info.n_partitions > 0);
92
+
93
+ for (hash_t r = 0; r < partition_info.n_partitions; r++) {
94
+ sel_vector_sizes[r] = 0;
95
+ }
96
+
97
+ hashes.Flatten(groups.size());
98
+ auto hashes_ptr = FlatVector::GetData<hash_t>(hashes);
99
+
100
+ // Determine for every partition how much data will be sinked into it
101
+ for (idx_t i = 0; i < groups.size(); i++) {
102
+ auto partition = partition_info.GetHashPartition(hashes_ptr[i]);
103
+ D_ASSERT(partition < partition_info.n_partitions);
104
+ sel_vectors[partition].set_index(sel_vector_sizes[partition]++, i);
105
+ }
106
+
107
+ #ifdef DEBUG
108
+ // make sure we have lost no rows
109
+ idx_t total_count = 0;
110
+ for (idx_t r = 0; r < partition_info.n_partitions; r++) {
111
+ total_count += sel_vector_sizes[r];
112
+ }
113
+ D_ASSERT(total_count == groups.size());
114
+ #endif
115
+ idx_t group_count = 0;
116
+ for (hash_t r = 0; r < partition_info.n_partitions; r++) {
117
+ group_subset.Slice(groups, sel_vectors[r], sel_vector_sizes[r]);
118
+ if (!payload_types.empty()) {
119
+ payload_subset.Slice(payload, sel_vectors[r], sel_vector_sizes[r]);
120
+ } else {
121
+ payload_subset.SetCardinality(sel_vector_sizes[r]);
122
+ }
123
+ hashes_subset.Slice(hashes, sel_vectors[r], sel_vector_sizes[r]);
124
+
125
+ group_count += ListAddChunk(radix_partitioned_hts[r], group_subset, hashes_subset, payload_subset, filter);
126
+ }
127
+ return group_count;
128
+ }
129
+
130
+ void PartitionableHashTable::Partition(bool sink_done) {
131
+ D_ASSERT(!IsPartitioned());
132
+ D_ASSERT(radix_partitioned_hts.empty());
133
+ D_ASSERT(partition_info.n_partitions > 1);
134
+
135
+ vector<GroupedAggregateHashTable *> partition_hts(partition_info.n_partitions);
136
+ radix_partitioned_hts.resize(partition_info.n_partitions);
137
+ for (auto &unpartitioned_ht : unpartitioned_hts) {
138
+ for (idx_t r = 0; r < partition_info.n_partitions; r++) {
139
+ radix_partitioned_hts[r].push_back(make_uniq<GroupedAggregateHashTable>(
140
+ context, allocator, group_types, payload_types, bindings, GetHTEntrySize()));
141
+ partition_hts[r] = radix_partitioned_hts[r].back().get();
142
+ }
143
+ unpartitioned_ht->Partition(partition_hts, partition_info.radix_bits, sink_done);
144
+ unpartitioned_ht.reset();
145
+ }
146
+ unpartitioned_hts.clear();
147
+ is_partitioned = true;
148
+ }
149
+
150
+ bool PartitionableHashTable::IsPartitioned() {
151
+ return is_partitioned;
152
+ }
153
+
154
+ HashTableList PartitionableHashTable::GetPartition(idx_t partition) {
155
+ D_ASSERT(IsPartitioned());
156
+ D_ASSERT(partition < partition_info.n_partitions);
157
+ D_ASSERT(radix_partitioned_hts.size() > partition);
158
+ return std::move(radix_partitioned_hts[partition]);
159
+ }
160
+
161
+ HashTableList PartitionableHashTable::GetUnpartitioned() {
162
+ D_ASSERT(!IsPartitioned());
163
+ return std::move(unpartitioned_hts);
164
+ }
165
+
166
+ idx_t PartitionableHashTable::GetPartitionCount(idx_t partition) const {
167
+ idx_t total_size = 0;
168
+ for (const auto &ht : radix_partitioned_hts[partition]) {
169
+ total_size += ht->Count();
170
+ }
171
+ return total_size;
172
+ }
173
+
174
+ idx_t PartitionableHashTable::GetPartitionSize(idx_t partition) const {
175
+ idx_t total_size = 0;
176
+ for (const auto &ht : radix_partitioned_hts[partition]) {
177
+ total_size += ht->DataSize();
178
+ }
179
+ return total_size;
180
+ }
181
+
182
+ void PartitionableHashTable::Finalize() {
183
+ if (IsPartitioned()) {
184
+ for (auto &ht_list : radix_partitioned_hts) {
185
+ for (auto &ht : ht_list) {
186
+ D_ASSERT(ht);
187
+ ht->Finalize();
188
+ }
189
+ }
190
+ } else {
191
+ for (auto &ht : unpartitioned_hts) {
192
+ D_ASSERT(ht);
193
+ ht->Finalize();
194
+ }
195
+ }
196
+ }
197
+
198
+ void PartitionableHashTable::Append(GroupedAggregateHashTable &ht) {
199
+ if (unpartitioned_hts.empty()) {
200
+ unpartitioned_hts.push_back(make_uniq<GroupedAggregateHashTable>(context, allocator, group_types, payload_types,
201
+ bindings, GetHTEntrySize(),
202
+ GroupedAggregateHashTable::InitialCapacity()));
203
+ }
204
+ unpartitioned_hts.back()->Append(ht);
205
+ }
206
+
207
+ } // namespace duckdb
@@ -130,7 +130,12 @@ void PerfectAggregateHashTable::AddChunk(DataChunk &groups, DataChunk &payload)
130
130
  // compute the actual pointer to the data by adding it to the base HT pointer and multiplying by the tuple size
131
131
  for (idx_t i = 0; i < groups.size(); i++) {
132
132
  const auto group = address_data[i];
133
- D_ASSERT(group < total_groups);
133
+ if (group >= total_groups) {
134
+ throw InvalidInputException("Perfect hash aggregate: aggregate group %llu exceeded total groups %llu. This "
135
+ "likely means that the statistics in your data source are corrupt.\n* PRAGMA "
136
+ "disable_optimizer to disable optimizations that rely on correct statistics",
137
+ group, total_groups);
138
+ }
134
139
  group_is_set[group] = true;
135
140
  address_data[i] = uintptr_t(data) + group * tuple_size;
136
141
  }
@@ -117,10 +117,6 @@ void CheckForPerfectJoinOpt(LogicalComparisonJoin &op, PerfectHashJoinStats &joi
117
117
  if (join_state.build_range > MAX_BUILD_SIZE) {
118
118
  return;
119
119
  }
120
- if (NumericStats::Min(stats_build) <= NumericStats::Min(stats_probe) &&
121
- NumericStats::Max(stats_probe) <= NumericStats::Max(stats_build)) {
122
- join_state.is_probe_in_domain = true;
123
- }
124
120
  join_state.is_build_small = true;
125
121
  return;
126
122
  }
@@ -1,24 +1,16 @@
1
1
  #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
2
- #include "duckdb/execution/operator/projection/physical_projection.hpp"
3
2
  #include "duckdb/execution/operator/filter/physical_filter.hpp"
4
- #include "duckdb/execution/operator/scan/physical_table_scan.hpp"
5
3
  #include "duckdb/execution/operator/schema/physical_create_art_index.hpp"
6
- #include "duckdb/execution/operator/order/physical_order.hpp"
7
4
  #include "duckdb/execution/physical_plan_generator.hpp"
8
5
  #include "duckdb/planner/operator/logical_create_index.hpp"
9
- #include "duckdb/planner/operator/logical_get.hpp"
10
- #include "duckdb/planner/expression/bound_operator_expression.hpp"
11
- #include "duckdb/planner/expression/bound_reference_expression.hpp"
12
- #include "duckdb/planner/table_filter.hpp"
6
+
7
+ #include "duckdb/main/database.hpp"
8
+ #include "duckdb/execution/index/index_type.hpp"
9
+ #include "duckdb/execution/index/bound_index.hpp"
13
10
 
14
11
  namespace duckdb {
15
12
 
16
13
  unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateIndex &op) {
17
- // generate a physical plan for the parallel index creation which consists of the following operators
18
- // table scan - projection (for expression execution) - filter (NOT NULL) - order (if applicable) - create index
19
-
20
- D_ASSERT(op.children.size() == 1);
21
- auto table_scan = CreatePlan(*op.children[0]);
22
14
 
23
15
  // validate that all expressions contain valid scalar functions
24
16
  // e.g. get_current_timestamp(), random(), and sequence values are not allowed as index keys
@@ -30,12 +22,14 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
30
22
  }
31
23
  }
32
24
 
33
- // if we get here and the index type is not ART, we throw an exception
34
- // because we don't support any other index type yet. However, an operator extension could have
35
- // replaced this part of the plan with a different index creation operator.
36
- if (op.info->index_type != ART::TYPE_NAME) {
25
+ // Do we have a valid index type?
26
+ const auto index_type = context.db->config.GetIndexTypes().FindByName(op.info->index_type);
27
+ if (!index_type) {
37
28
  throw BinderException("Unknown index type: " + op.info->index_type);
38
29
  }
30
+ if (!index_type->create_plan) {
31
+ throw InternalException("Index type '%s' is missing a create_plan function", op.info->index_type);
32
+ }
39
33
 
40
34
  // table scan operator for index key columns and row IDs
41
35
  dependencies.AddDependency(op.table);
@@ -43,78 +37,11 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateInde
43
37
  D_ASSERT(op.info->scan_types.size() - 1 <= op.info->names.size());
44
38
  D_ASSERT(op.info->scan_types.size() - 1 <= op.info->column_ids.size());
45
39
 
46
- // projection to execute expressions on the key columns
47
-
48
- vector<LogicalType> new_column_types;
49
- vector<unique_ptr<Expression>> select_list;
50
- for (idx_t i = 0; i < op.expressions.size(); i++) {
51
- new_column_types.push_back(op.expressions[i]->return_type);
52
- select_list.push_back(std::move(op.expressions[i]));
53
- }
54
- new_column_types.emplace_back(LogicalType::ROW_TYPE);
55
- select_list.push_back(make_uniq<BoundReferenceExpression>(LogicalType::ROW_TYPE, op.info->scan_types.size() - 1));
56
-
57
- auto projection = make_uniq<PhysicalProjection>(new_column_types, std::move(select_list), op.estimated_cardinality);
58
- projection->children.push_back(std::move(table_scan));
59
-
60
- // filter operator for IS_NOT_NULL on each key column
61
-
62
- vector<LogicalType> filter_types;
63
- vector<unique_ptr<Expression>> filter_select_list;
64
-
65
- for (idx_t i = 0; i < new_column_types.size() - 1; i++) {
66
- filter_types.push_back(new_column_types[i]);
67
- auto is_not_null_expr =
68
- make_uniq<BoundOperatorExpression>(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN);
69
- auto bound_ref = make_uniq<BoundReferenceExpression>(new_column_types[i], i);
70
- is_not_null_expr->children.push_back(std::move(bound_ref));
71
- filter_select_list.push_back(std::move(is_not_null_expr));
72
- }
73
-
74
- auto null_filter =
75
- make_uniq<PhysicalFilter>(std::move(filter_types), std::move(filter_select_list), op.estimated_cardinality);
76
- null_filter->types.emplace_back(LogicalType::ROW_TYPE);
77
- null_filter->children.push_back(std::move(projection));
78
-
79
- // determine if we sort the data prior to index creation
80
- // we don't sort, if either VARCHAR or compound key
81
- auto perform_sorting = true;
82
- if (op.unbound_expressions.size() > 1) {
83
- perform_sorting = false;
84
- } else if (op.unbound_expressions[0]->return_type.InternalType() == PhysicalType::VARCHAR) {
85
- perform_sorting = false;
86
- }
87
-
88
- // actual physical create index operator
89
-
90
- auto physical_create_index =
91
- make_uniq<PhysicalCreateARTIndex>(op, op.table, op.info->column_ids, std::move(op.info),
92
- std::move(op.unbound_expressions), op.estimated_cardinality, perform_sorting);
93
-
94
- if (perform_sorting) {
95
-
96
- // optional order operator
97
- vector<BoundOrderByNode> orders;
98
- vector<idx_t> projections;
99
- for (idx_t i = 0; i < new_column_types.size() - 1; i++) {
100
- auto col_expr = make_uniq_base<Expression, BoundReferenceExpression>(new_column_types[i], i);
101
- orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, std::move(col_expr));
102
- projections.emplace_back(i);
103
- }
104
- projections.emplace_back(new_column_types.size() - 1);
105
-
106
- auto physical_order = make_uniq<PhysicalOrder>(new_column_types, std::move(orders), std::move(projections),
107
- op.estimated_cardinality);
108
- physical_order->children.push_back(std::move(null_filter));
109
-
110
- physical_create_index->children.push_back(std::move(physical_order));
111
- } else {
112
-
113
- // no ordering
114
- physical_create_index->children.push_back(std::move(null_filter));
115
- }
40
+ D_ASSERT(op.children.size() == 1);
41
+ auto table_scan = CreatePlan(*op.children[0]);
116
42
 
117
- return std::move(physical_create_index);
43
+ PlanIndexInput input(context, op, table_scan);
44
+ return index_type->create_plan(input);
118
45
  }
119
46
 
120
47
  } // namespace duckdb
@@ -11,7 +11,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalExport &op
11
11
  throw PermissionException("Export is disabled through configuration");
12
12
  }
13
13
  auto export_node = make_uniq<PhysicalExport>(op.types, op.function, std::move(op.copy_info),
14
- op.estimated_cardinality, op.exported_tables);
14
+ op.estimated_cardinality, std::move(op.exported_tables));
15
15
  // plan the underlying copy statements, if any
16
16
  if (!op.children.empty()) {
17
17
  auto plan = CreatePlan(*op.children[0]);
@@ -159,7 +159,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
159
159
  vector<unique_ptr<Expression>> expressions;
160
160
  for (auto &column_id : column_ids) {
161
161
  if (column_id == COLUMN_IDENTIFIER_ROW_ID) {
162
- types.emplace_back(LogicalType::BIGINT);
162
+ types.emplace_back(LogicalType::ROW_TYPE);
163
163
  expressions.push_back(make_uniq<BoundConstantExpression>(Value::BIGINT(0)));
164
164
  } else {
165
165
  auto type = op.returned_types[column_id];
@@ -79,7 +79,7 @@ unique_ptr<DataChunk> ReservoirSample::GetChunk() {
79
79
  for (idx_t i = samples_remaining; i < collected_sample_count; i++) {
80
80
  sel.set_index(i - samples_remaining, i);
81
81
  }
82
- ret->Initialize(allocator, reservoir_types.begin(), reservoir_types.end(), STANDARD_VECTOR_SIZE);
82
+ ret->Initialize(allocator, reservoir_types);
83
83
  ret->Slice(*reservoir_data_chunk, sel, STANDARD_VECTOR_SIZE);
84
84
  ret->SetCardinality(STANDARD_VECTOR_SIZE);
85
85
  // reduce capacity and cardinality of the sample data chunk
@@ -1671,23 +1671,23 @@ void WindowLeadLagExecutor::EvaluateInternal(WindowExecutorGlobalState &gstate,
1671
1671
  // else offset is zero, so don't move.
1672
1672
 
1673
1673
  if (can_shift) {
1674
+ const auto target_limit = MinValue(partition_end[i], row_end) - row_idx;
1674
1675
  if (!delta) {
1675
1676
  // Copy source[index:index+width] => result[i:]
1676
1677
  const auto index = NumericCast<idx_t>(val_idx);
1677
1678
  const auto source_limit = partition_end[i] - index;
1678
- const auto target_limit = MinValue(partition_end[i], row_end) - row_idx;
1679
1679
  const auto width = MinValue(source_limit, target_limit);
1680
1680
  auto &source = payload_collection.data[0];
1681
1681
  VectorOperations::Copy(source, result, index + width, index, i);
1682
1682
  i += width;
1683
1683
  row_idx += width;
1684
1684
  } else if (wexpr.default_expr) {
1685
- const auto width = MinValue(delta, count - i);
1685
+ const auto width = MinValue(delta, target_limit);
1686
1686
  llstate.leadlag_default.CopyCell(result, i, width);
1687
1687
  i += width;
1688
1688
  row_idx += width;
1689
1689
  } else {
1690
- for (idx_t nulls = MinValue(delta, count - i); nulls--; ++i, ++row_idx) {
1690
+ for (idx_t nulls = MinValue(delta, target_limit); nulls--; ++i, ++row_idx) {
1691
1691
  FlatVector::SetNull(result, i, true);
1692
1692
  }
1693
1693
  }