duckdb 0.8.2-dev11.0 → 0.8.2-dev1212.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (396) hide show
  1. package/binding.gyp +14 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/lib/duckdb.d.ts +59 -0
  6. package/lib/duckdb.js +21 -0
  7. package/package.json +1 -1
  8. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  10. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  11. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  12. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  13. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  14. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  15. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  16. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  17. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  18. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  19. package/src/duckdb/extension/icu/icu_extension.cpp +5 -7
  20. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  21. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  22. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  23. package/src/duckdb/extension/json/json_common.cpp +272 -40
  24. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  26. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  27. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  28. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  29. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  30. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  31. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  32. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  33. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  34. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  35. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  36. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  37. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  38. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  39. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  40. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  41. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  42. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  43. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  44. package/src/duckdb/extension/parquet/parquet_extension.cpp +191 -19
  45. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  46. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  47. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  48. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  49. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  50. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  51. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  52. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  53. package/src/duckdb/src/common/allocator.cpp +14 -2
  54. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  55. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +0 -12
  56. package/src/duckdb/src/common/assert.cpp +3 -0
  57. package/src/duckdb/src/common/enum_util.cpp +42 -5
  58. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  60. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  61. package/src/duckdb/src/common/exception.cpp +2 -2
  62. package/src/duckdb/src/common/file_system.cpp +15 -0
  63. package/src/duckdb/src/common/local_file_system.cpp +2 -2
  64. package/src/duckdb/src/common/multi_file_reader.cpp +181 -18
  65. package/src/duckdb/src/common/radix_partitioning.cpp +27 -9
  66. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  67. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  68. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  69. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  70. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  71. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  72. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  73. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  74. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  75. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  76. package/src/duckdb/src/common/types/date.cpp +9 -0
  77. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  78. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  79. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  80. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  81. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  82. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  83. package/src/duckdb/src/common/types/vector.cpp +15 -14
  84. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +6 -4
  85. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  86. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  87. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  88. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  92. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  93. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  94. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +0 -17
  95. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  96. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  97. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  98. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  99. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  100. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  102. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  103. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  104. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  105. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  106. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  107. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  108. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  109. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  110. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  111. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  112. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  113. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  114. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  115. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  116. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +414 -283
  117. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  118. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  119. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +22 -3
  121. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -2
  122. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  123. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  124. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  125. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  126. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  127. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  128. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -2
  129. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  130. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  131. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  132. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  133. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  134. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  135. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  136. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  137. package/src/duckdb/src/execution/physical_operator.cpp +17 -14
  138. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  139. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  140. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  141. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  142. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  143. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  144. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  145. package/src/duckdb/src/function/function.cpp +3 -1
  146. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  147. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  148. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  149. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  150. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  151. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  152. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  153. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  154. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  155. package/src/duckdb/src/function/table/system/test_all_types.cpp +38 -18
  156. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  157. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  158. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  159. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  160. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  161. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  162. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  163. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  164. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  165. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  166. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  167. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  168. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  169. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  170. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  171. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  172. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  173. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  174. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  175. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  176. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  177. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  178. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  179. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  180. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  181. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  182. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  183. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  184. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  185. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  186. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  187. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  188. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  189. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  190. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +24 -6
  191. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  192. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  193. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  194. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  195. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  196. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  197. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  198. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  199. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  200. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  201. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  202. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -10
  203. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  204. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  205. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  206. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  207. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  208. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  209. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  210. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  211. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  212. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -0
  213. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  214. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  215. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  216. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  217. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  218. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  219. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  220. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  221. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  222. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  223. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +1 -1
  224. package/src/duckdb/src/include/duckdb/main/client_config.hpp +3 -0
  225. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  226. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  227. package/src/duckdb/src/include/duckdb/main/settings.hpp +21 -1
  228. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  229. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  230. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  231. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  232. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  233. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  234. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  235. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  236. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  237. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  238. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  239. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  240. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  241. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  242. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  243. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  244. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  245. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  246. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  247. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  248. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  249. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  250. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -5
  251. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  252. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  253. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  254. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  255. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  256. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  257. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  258. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  259. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  260. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  261. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  262. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  263. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  264. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  265. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  266. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  267. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  268. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  269. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  270. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  271. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  272. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  273. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  274. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  275. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  276. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  277. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  278. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  279. package/src/duckdb/src/include/duckdb.h +28 -0
  280. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  281. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  282. package/src/duckdb/src/main/config.cpp +2 -0
  283. package/src/duckdb/src/main/database.cpp +1 -1
  284. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  285. package/src/duckdb/src/main/settings/settings.cpp +40 -18
  286. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  287. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  288. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  289. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  290. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  291. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  292. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  293. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  294. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  295. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  296. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  297. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  298. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  299. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  300. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  301. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  302. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  303. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  304. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  305. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  306. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  307. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  308. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  309. package/src/duckdb/src/parallel/executor.cpp +15 -0
  310. package/src/duckdb/src/parallel/pipeline_executor.cpp +7 -6
  311. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  312. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  313. package/src/duckdb/src/parser/expression/lambda_expression.cpp +1 -1
  314. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  315. package/src/duckdb/src/parser/query_node/cte_node.cpp +75 -0
  316. package/src/duckdb/src/parser/query_node.cpp +18 -1
  317. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -0
  318. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  319. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  320. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  321. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  322. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  323. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  324. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  325. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  326. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  327. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  328. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  329. package/src/duckdb/src/parser/transformer.cpp +15 -0
  330. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  331. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  332. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  333. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  334. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  335. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  336. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  337. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -50
  338. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  339. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +61 -26
  340. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  341. package/src/duckdb/src/planner/binder.cpp +5 -0
  342. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  343. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  344. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  345. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  346. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  347. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  348. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  349. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  350. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  351. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  352. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  353. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  354. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  355. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  356. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  357. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  358. package/src/duckdb/src/storage/data_table.cpp +1 -1
  359. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  360. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  361. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  362. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  363. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  364. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  365. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  366. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  367. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  368. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  369. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  370. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  371. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  372. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  373. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  374. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  375. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  376. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  377. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  378. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  379. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  380. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  381. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  382. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  383. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  384. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  385. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  386. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  387. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  388. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  389. package/src/duckdb_node.hpp +1 -0
  390. package/src/statement.cpp +104 -4
  391. package/test/columns.test.ts +243 -0
  392. package/test/test_all_types.test.ts +233 -0
  393. package/tsconfig.json +1 -0
  394. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  395. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  396. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -5,34 +5,67 @@
5
5
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
6
6
  #include "duckdb/planner/expression/bound_operator_expression.hpp"
7
7
  #include "duckdb/planner/expression/bound_subquery_expression.hpp"
8
+ #include "duckdb/planner/query_node/bound_select_node.hpp"
8
9
  #include "duckdb/planner/expression_iterator.hpp"
10
+ #include "duckdb/planner/tableref/bound_joinref.hpp"
11
+ #include "duckdb/planner/operator/logical_dependent_join.hpp"
9
12
 
10
13
  namespace duckdb {
11
14
 
12
15
  RewriteCorrelatedExpressions::RewriteCorrelatedExpressions(ColumnBinding base_binding,
13
- column_binding_map_t<idx_t> &correlated_map)
14
- : base_binding(base_binding), correlated_map(correlated_map) {
16
+ column_binding_map_t<idx_t> &correlated_map,
17
+ idx_t lateral_depth, bool recursive_rewrite)
18
+ : base_binding(base_binding), correlated_map(correlated_map), lateral_depth(lateral_depth),
19
+ recursive_rewrite(recursive_rewrite) {
15
20
  }
16
21
 
17
22
  void RewriteCorrelatedExpressions::VisitOperator(LogicalOperator &op) {
23
+ if (recursive_rewrite) {
24
+ // Update column bindings from left child of lateral to right child
25
+ if (op.type == LogicalOperatorType::LOGICAL_DEPENDENT_JOIN) {
26
+ D_ASSERT(op.children.size() == 2);
27
+ VisitOperator(*op.children[0]);
28
+ lateral_depth++;
29
+ VisitOperator(*op.children[1]);
30
+ lateral_depth--;
31
+ } else {
32
+ VisitOperatorChildren(op);
33
+ }
34
+ }
35
+ // update the bindings in the correlated columns of the dependendent join
36
+ if (op.type == LogicalOperatorType::LOGICAL_DEPENDENT_JOIN) {
37
+ auto &plan = op.Cast<LogicalDependentJoin>();
38
+ for (auto &corr : plan.correlated_columns) {
39
+ auto entry = correlated_map.find(corr.binding);
40
+ if (entry != correlated_map.end()) {
41
+ corr.binding = ColumnBinding(base_binding.table_index, base_binding.column_index + entry->second);
42
+ }
43
+ }
44
+ }
18
45
  VisitOperatorExpressions(op);
19
46
  }
20
47
 
21
48
  unique_ptr<Expression> RewriteCorrelatedExpressions::VisitReplace(BoundColumnRefExpression &expr,
22
49
  unique_ptr<Expression> *expr_ptr) {
23
- if (expr.depth == 0) {
50
+ if (expr.depth <= lateral_depth) {
51
+ // Indicates local correlations not relevant for the current the rewrite
24
52
  return nullptr;
25
53
  }
26
54
  // correlated column reference
27
55
  // replace with the entry referring to the duplicate eliminated scan
28
- // if this assertion occurs it generally means the correlated expressions were not propagated correctly
29
- // through different binders
30
- D_ASSERT(expr.depth == 1);
56
+ // if this assertion occurs it generally means the bindings are inappropriate set in the binder or
57
+ // we either missed to account for lateral binder or over-counted for the lateral binder
58
+ D_ASSERT(expr.depth == 1 + lateral_depth);
31
59
  auto entry = correlated_map.find(expr.binding);
32
60
  D_ASSERT(entry != correlated_map.end());
33
61
 
34
62
  expr.binding = ColumnBinding(base_binding.table_index, base_binding.column_index + entry->second);
35
- expr.depth = 0;
63
+ if (recursive_rewrite) {
64
+ D_ASSERT(expr.depth > 1);
65
+ expr.depth--;
66
+ } else {
67
+ expr.depth = 0;
68
+ }
36
69
  return nullptr;
37
70
  }
38
71
 
@@ -53,6 +86,21 @@ RewriteCorrelatedExpressions::RewriteCorrelatedRecursive::RewriteCorrelatedRecur
53
86
  : parent(parent), base_binding(base_binding), correlated_map(correlated_map) {
54
87
  }
55
88
 
89
+ void RewriteCorrelatedExpressions::RewriteCorrelatedRecursive::RewriteJoinRefRecursive(BoundTableRef &ref) {
90
+ // recursively rewrite bindings in the correlated columns for the table ref and all the children
91
+ if (ref.type == TableReferenceType::JOIN) {
92
+ auto &bound_join = ref.Cast<BoundJoinRef>();
93
+ for (auto &corr : bound_join.correlated_columns) {
94
+ auto entry = correlated_map.find(corr.binding);
95
+ if (entry != correlated_map.end()) {
96
+ corr.binding = ColumnBinding(base_binding.table_index, base_binding.column_index + entry->second);
97
+ }
98
+ }
99
+ RewriteJoinRefRecursive(*bound_join.left);
100
+ RewriteJoinRefRecursive(*bound_join.right);
101
+ }
102
+ }
103
+
56
104
  void RewriteCorrelatedExpressions::RewriteCorrelatedRecursive::RewriteCorrelatedSubquery(
57
105
  BoundSubqueryExpression &expr) {
58
106
  // rewrite the binding in the correlated list of the subquery)
@@ -62,6 +110,16 @@ void RewriteCorrelatedExpressions::RewriteCorrelatedRecursive::RewriteCorrelated
62
110
  corr.binding = ColumnBinding(base_binding.table_index, base_binding.column_index + entry->second);
63
111
  }
64
112
  }
113
+ // TODO: Cleanup and find a better way to do this
114
+ auto &node = *expr.subquery;
115
+ if (node.type == QueryNodeType::SELECT_NODE) {
116
+ // Found an unplanned select node, need to update column bindings correlated columns in the from tables
117
+ auto &bound_select = node.Cast<BoundSelectNode>();
118
+ if (bound_select.from_table) {
119
+ BoundTableRef &table_ref = *bound_select.from_table;
120
+ RewriteJoinRefRecursive(table_ref);
121
+ }
122
+ }
65
123
  // now rewrite any correlated BoundColumnRef expressions inside the subquery
66
124
  ExpressionIterator::EnumerateQueryNodeChildren(*expr.subquery,
67
125
  [&](Expression &child) { RewriteCorrelatedExpressions(child); });
@@ -109,7 +109,6 @@ data_ptr_t ArenaAllocator::ReallocateAligned(data_ptr_t pointer, idx_t old_size,
109
109
  }
110
110
 
111
111
  void ArenaAllocator::Reset() {
112
-
113
112
  if (head) {
114
113
  // destroy all chunks except the current one
115
114
  if (head->next) {
@@ -148,7 +147,7 @@ ArenaChunk *ArenaAllocator::GetTail() {
148
147
  return tail;
149
148
  }
150
149
 
151
- bool ArenaAllocator::IsEmpty() {
150
+ bool ArenaAllocator::IsEmpty() const {
152
151
  return head == nullptr;
153
152
  }
154
153
 
@@ -79,4 +79,7 @@ void BlockManager::UnregisterBlock(block_id_t block_id, bool can_destroy) {
79
79
  }
80
80
  }
81
81
 
82
+ void BlockManager::Truncate() {
83
+ }
84
+
82
85
  } // namespace duckdb
@@ -111,6 +111,9 @@ void SingleFileCheckpointWriter::CreateCheckpoint() {
111
111
  // truncate the WAL
112
112
  wal->Truncate(0);
113
113
 
114
+ // truncate the file
115
+ block_manager.Truncate();
116
+
114
117
  // mark all blocks written as part of the metadata as modified
115
118
  metadata_writer->MarkWrittenBlocks();
116
119
  table_metadata_writer->MarkWrittenBlocks();
@@ -272,7 +272,6 @@ struct RLEScanState : public SegmentScanState {
272
272
  }
273
273
 
274
274
  BufferHandle handle;
275
- uint32_t rle_offset;
276
275
  idx_t entry_pos;
277
276
  idx_t position_in_entry;
278
277
  uint32_t rle_count_offset;
@@ -56,7 +56,7 @@ DataTable::DataTable(AttachedDatabase &db, shared_ptr<TableIOManager> table_io_m
56
56
  row_groups->Verify();
57
57
  }
58
58
 
59
- DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition &new_column, Expression *default_value)
59
+ DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition &new_column, Expression &default_value)
60
60
  : info(parent.info), db(parent.db), is_root(true) {
61
61
  // add the column definitions from this DataTable
62
62
  for (auto &column_def : parent.column_definitions) {
@@ -61,11 +61,11 @@ LocalTableStorage::LocalTableStorage(DataTable &new_dt, LocalTableStorage &paren
61
61
  }
62
62
 
63
63
  LocalTableStorage::LocalTableStorage(ClientContext &context, DataTable &new_dt, LocalTableStorage &parent,
64
- ColumnDefinition &new_column, optional_ptr<Expression> default_value)
64
+ ColumnDefinition &new_column, Expression &default_value)
65
65
  : table_ref(new_dt), allocator(Allocator::Get(new_dt.db)), deleted_rows(parent.deleted_rows),
66
66
  optimistic_writer(new_dt, parent.optimistic_writer), optimistic_writers(std::move(parent.optimistic_writers)),
67
67
  merged_storage(parent.merged_storage) {
68
- row_groups = parent.row_groups->AddColumn(context, new_column, default_value.get());
68
+ row_groups = parent.row_groups->AddColumn(context, new_column, default_value);
69
69
  parent.row_groups.reset();
70
70
  indexes.Move(parent.indexes);
71
71
  }
@@ -508,7 +508,7 @@ void LocalStorage::MoveStorage(DataTable &old_dt, DataTable &new_dt) {
508
508
  }
509
509
 
510
510
  void LocalStorage::AddColumn(DataTable &old_dt, DataTable &new_dt, ColumnDefinition &new_column,
511
- optional_ptr<Expression> default_value) {
511
+ Expression &default_value) {
512
512
  // check if there are any pending appends for the old version of the table
513
513
  auto storage = table_manager.MoveEntry(old_dt);
514
514
  if (!storage) {
@@ -368,6 +368,29 @@ void SingleFileBlockManager::Write(FileBuffer &buffer, block_id_t block_id) {
368
368
  ChecksumAndWrite(buffer, BLOCK_START + block_id * Storage::BLOCK_ALLOC_SIZE);
369
369
  }
370
370
 
371
+ void SingleFileBlockManager::Truncate() {
372
+ BlockManager::Truncate();
373
+ idx_t blocks_to_truncate = 0;
374
+ // reverse iterate over the free-list
375
+ for (auto entry = free_list.rbegin(); entry != free_list.rend(); entry++) {
376
+ auto block_id = *entry;
377
+ if (block_id + 1 != max_block) {
378
+ break;
379
+ }
380
+ blocks_to_truncate++;
381
+ max_block--;
382
+ }
383
+ if (blocks_to_truncate == 0) {
384
+ // nothing to truncate
385
+ return;
386
+ }
387
+ // truncate the file
388
+ for (idx_t i = 0; i < blocks_to_truncate; i++) {
389
+ free_list.erase(max_block + i);
390
+ }
391
+ handle->Truncate(BLOCK_START + max_block * Storage::BLOCK_ALLOC_SIZE);
392
+ }
393
+
371
394
  vector<block_id_t> SingleFileBlockManager::GetFreeListBlocks() {
372
395
  vector<block_id_t> free_list_blocks;
373
396
 
@@ -1,10 +1,11 @@
1
1
  #include "duckdb/storage/statistics/string_stats.hpp"
2
- #include "duckdb/storage/statistics/base_statistics.hpp"
2
+
3
3
  #include "duckdb/common/field_writer.hpp"
4
- #include "utf8proc_wrapper.hpp"
5
4
  #include "duckdb/common/string_util.hpp"
6
5
  #include "duckdb/common/types/vector.hpp"
7
6
  #include "duckdb/main/error_manager.hpp"
7
+ #include "duckdb/storage/statistics/base_statistics.hpp"
8
+ #include "utf8proc_wrapper.hpp"
8
9
 
9
10
  namespace duckdb {
10
11
 
@@ -67,6 +68,24 @@ bool StringStats::CanContainUnicode(const BaseStatistics &stats) {
67
68
  return StringStats::GetDataUnsafe(stats).has_unicode;
68
69
  }
69
70
 
71
+ string GetStringMinMaxValue(const data_t data[]) {
72
+ idx_t len;
73
+ for (len = 0; len < StringStatsData::MAX_STRING_MINMAX_SIZE; len++) {
74
+ if (!data[len]) {
75
+ break;
76
+ }
77
+ }
78
+ return string(const_char_ptr_cast(data), len);
79
+ }
80
+
81
+ string StringStats::Min(const BaseStatistics &stats) {
82
+ return GetStringMinMaxValue(StringStats::GetDataUnsafe(stats).min);
83
+ }
84
+
85
+ string StringStats::Max(const BaseStatistics &stats) {
86
+ return GetStringMinMaxValue(StringStats::GetDataUnsafe(stats).max);
87
+ }
88
+
70
89
  void StringStats::ResetMaxStringLength(BaseStatistics &stats) {
71
90
  StringStats::GetDataUnsafe(stats).has_max_string_length = false;
72
91
  }
@@ -2,7 +2,7 @@
2
2
 
3
3
  namespace duckdb {
4
4
 
5
- const uint64_t VERSION_NUMBER = 51;
5
+ const uint64_t VERSION_NUMBER = 52;
6
6
 
7
7
  struct StorageVersionInfo {
8
8
  const char *version_name;
@@ -94,8 +94,13 @@ void SingleFileStorageManager::LoadDatabase() {
94
94
  table_io_manager = make_uniq<SingleFileTableIOManager>(*block_manager);
95
95
  return;
96
96
  }
97
-
98
- string wal_path = path + ".wal";
97
+ std::size_t question_mark_pos = path.find('?');
98
+ auto wal_path = path;
99
+ if (question_mark_pos != std::string::npos) {
100
+ wal_path.insert(question_mark_pos, ".wal");
101
+ } else {
102
+ wal_path += ".wal";
103
+ }
99
104
  auto &fs = FileSystem::Get(db);
100
105
  auto &config = DBConfig::Get(db);
101
106
  bool truncate_wal = false;
@@ -89,6 +89,10 @@ void ChunkConstantInfo::Serialize(Serializer &serializer) {
89
89
  serializer.Write<idx_t>(start);
90
90
  }
91
91
 
92
+ idx_t ChunkConstantInfo::GetCommittedDeletedCount(idx_t max_count) {
93
+ return delete_id < TRANSACTION_ID_START ? max_count : 0;
94
+ }
95
+
92
96
  unique_ptr<ChunkInfo> ChunkConstantInfo::Deserialize(Deserializer &source) {
93
97
  auto start = source.Read<idx_t>();
94
98
 
@@ -244,6 +248,19 @@ void ChunkVectorInfo::Serialize(Serializer &serializer) {
244
248
  serializer.WriteData(data_ptr_cast(deleted_tuples), sizeof(bool) * STANDARD_VECTOR_SIZE);
245
249
  }
246
250
 
251
+ idx_t ChunkVectorInfo::GetCommittedDeletedCount(idx_t max_count) {
252
+ if (!any_deleted) {
253
+ return 0;
254
+ }
255
+ idx_t delete_count = 0;
256
+ for (idx_t i = 0; i < max_count; i++) {
257
+ if (deleted[i] < TRANSACTION_ID_START) {
258
+ delete_count++;
259
+ }
260
+ }
261
+ return delete_count;
262
+ }
263
+
247
264
  unique_ptr<ChunkInfo> ChunkVectorInfo::Deserialize(Deserializer &source) {
248
265
  auto start = source.Read<idx_t>();
249
266
 
@@ -67,6 +67,21 @@ void VersionNode::SetStart(idx_t start) {
67
67
  }
68
68
  }
69
69
 
70
+ idx_t VersionNode::GetCommittedDeletedCount(idx_t count) {
71
+ idx_t deleted_count = 0;
72
+ for (idx_t r = 0, i = 0; r < count; r += STANDARD_VECTOR_SIZE, i++) {
73
+ if (!info[i]) {
74
+ continue;
75
+ }
76
+ idx_t max_count = MinValue<idx_t>(STANDARD_VECTOR_SIZE, count - r);
77
+ if (max_count == 0) {
78
+ break;
79
+ }
80
+ deleted_count += info[i]->GetCommittedDeletedCount(max_count);
81
+ }
82
+ return deleted_count;
83
+ }
84
+
70
85
  RowGroup::~RowGroup() {
71
86
  }
72
87
 
@@ -112,10 +127,6 @@ ColumnData &RowGroup::GetColumn(storage_t c) {
112
127
  return *columns[c];
113
128
  }
114
129
 
115
- DatabaseInstance &RowGroup::GetDatabase() {
116
- return GetCollection().GetDatabase();
117
- }
118
-
119
130
  BlockManager &RowGroup::GetBlockManager() {
120
131
  return GetCollection().GetBlockManager();
121
132
  }
@@ -270,7 +281,7 @@ unique_ptr<RowGroup> RowGroup::AlterType(RowGroupCollection &new_collection, con
270
281
  }
271
282
 
272
283
  unique_ptr<RowGroup> RowGroup::AddColumn(RowGroupCollection &new_collection, ColumnDefinition &new_column,
273
- ExpressionExecutor &executor, Expression *default_value, Vector &result) {
284
+ ExpressionExecutor &executor, Expression &default_value, Vector &result) {
274
285
  Verify();
275
286
 
276
287
  // construct a new column data for the new column
@@ -285,10 +296,8 @@ unique_ptr<RowGroup> RowGroup::AddColumn(RowGroupCollection &new_collection, Col
285
296
  added_column->InitializeAppend(state);
286
297
  for (idx_t i = 0; i < rows_to_write; i += STANDARD_VECTOR_SIZE) {
287
298
  idx_t rows_in_this_vector = MinValue<idx_t>(rows_to_write - i, STANDARD_VECTOR_SIZE);
288
- if (default_value) {
289
- dummy_chunk.SetCardinality(rows_in_this_vector);
290
- executor.ExecuteExpression(dummy_chunk, result);
291
- }
299
+ dummy_chunk.SetCardinality(rows_in_this_vector);
300
+ executor.ExecuteExpression(dummy_chunk, result);
292
301
  added_column->Append(state, result, rows_in_this_vector);
293
302
  }
294
303
  }
@@ -796,6 +805,13 @@ RowGroupWriteData RowGroup::WriteToDisk(PartialBlockManager &manager,
796
805
  return result;
797
806
  }
798
807
 
808
+ bool RowGroup::AllDeleted() {
809
+ if (!version_info) {
810
+ return false;
811
+ }
812
+ return version_info->GetCommittedDeletedCount(count) == count;
813
+ }
814
+
799
815
  RowGroupPointer RowGroup::Checkpoint(RowGroupWriter &writer, TableStatistics &global_stats) {
800
816
  RowGroupPointer row_group_pointer;
801
817
 
@@ -67,10 +67,6 @@ AttachedDatabase &RowGroupCollection::GetAttached() {
67
67
  return GetTableInfo().db;
68
68
  }
69
69
 
70
- DatabaseInstance &RowGroupCollection::GetDatabase() {
71
- return GetAttached().GetDatabase();
72
- }
73
-
74
70
  //===--------------------------------------------------------------------===//
75
71
  // Initialize
76
72
  //===--------------------------------------------------------------------===//
@@ -97,10 +93,6 @@ RowGroup *RowGroupCollection::GetRowGroup(int64_t index) {
97
93
  return (RowGroup *)row_groups->GetSegmentByIndex(index);
98
94
  }
99
95
 
100
- idx_t RowGroupCollection::RowGroupCount() {
101
- return row_groups->GetSegmentCount();
102
- }
103
-
104
96
  void RowGroupCollection::Verify() {
105
97
  #ifdef DEBUG
106
98
  idx_t current_total_rows = 0;
@@ -600,11 +592,24 @@ void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_i
600
592
  // Checkpoint
601
593
  //===--------------------------------------------------------------------===//
602
594
  void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &global_stats) {
603
- for (auto &row_group : row_groups->Segments()) {
604
- auto rowg_writer = writer.GetRowGroupWriter(row_group);
605
- auto pointer = row_group.Checkpoint(*rowg_writer, global_stats);
606
- writer.AddRowGroup(std::move(pointer), std::move(rowg_writer));
595
+ bool can_vacuum_deletes = info->indexes.Empty();
596
+ idx_t start = this->row_start;
597
+ auto segments = row_groups->MoveSegments();
598
+ auto l = row_groups->Lock();
599
+ for (auto &entry : segments) {
600
+ auto &row_group = *entry.node;
601
+ if (can_vacuum_deletes && row_group.AllDeleted()) {
602
+ row_group.CommitDrop();
603
+ continue;
604
+ }
605
+ row_group.MoveToCollection(*this, start);
606
+ auto row_group_writer = writer.GetRowGroupWriter(row_group);
607
+ auto pointer = row_group.Checkpoint(*row_group_writer, global_stats);
608
+ writer.AddRowGroup(std::move(pointer), std::move(row_group_writer));
609
+ row_groups->AppendSegment(l, std::move(entry.node));
610
+ start += row_group.count;
607
611
  }
612
+ total_rows = start;
608
613
  }
609
614
 
610
615
  //===--------------------------------------------------------------------===//
@@ -637,7 +642,7 @@ vector<ColumnSegmentInfo> RowGroupCollection::GetColumnSegmentInfo() {
637
642
  // Alter
638
643
  //===--------------------------------------------------------------------===//
639
644
  shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &context, ColumnDefinition &new_column,
640
- Expression *default_value) {
645
+ Expression &default_value) {
641
646
  idx_t new_column_idx = types.size();
642
647
  auto new_types = types;
643
648
  new_types.push_back(new_column.GetType());
@@ -647,11 +652,7 @@ shared_ptr<RowGroupCollection> RowGroupCollection::AddColumn(ClientContext &cont
647
652
  ExpressionExecutor executor(context);
648
653
  DataChunk dummy_chunk;
649
654
  Vector default_vector(new_column.GetType());
650
- if (!default_value) {
651
- FlatVector::Validity(default_vector).SetAllInvalid(STANDARD_VECTOR_SIZE);
652
- } else {
653
- executor.AddExpression(*default_value);
654
- }
655
+ executor.AddExpression(default_value);
655
656
 
656
657
  result->stats.InitializeAddColumn(stats, new_column.GetType());
657
658
  auto &new_column_stats = result->stats.GetStats(new_column_idx);
@@ -80,7 +80,7 @@ namespace duckdb_moodycamel { namespace details {
80
80
  static const thread_id_t invalid_thread_id2 = 0xFFFFFFFFU; // Not technically guaranteed to be invalid, but is never used in practice. Note that all Win32 thread IDs are presently multiples of 4.
81
81
  static inline thread_id_t thread_id() { return static_cast<thread_id_t>(::GetCurrentThreadId()); }
82
82
  } }
83
- #elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE)
83
+ #elif defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || (defined(__APPLE__) && TARGET_OS_IPHONE) || defined(__MVS__)
84
84
  namespace duckdb_moodycamel { namespace details {
85
85
  static_assert(sizeof(std::thread::id) == 4 || sizeof(std::thread::id) == 8, "std::thread::id is expected to be either 4 or 8 bytes");
86
86
 
@@ -194,7 +194,7 @@ namespace duckdb_moodycamel { namespace details {
194
194
  // VS2013 doesn't support `thread_local`, and MinGW-w64 w/ POSIX threading has a crippling bug: http://sourceforge.net/p/mingw-w64/bugs/445
195
195
  // g++ <=4.7 doesn't support thread_local either.
196
196
  // Finally, iOS/ARM doesn't have support for it either, and g++/ARM allows it to compile but it's unconfirmed to actually work
197
- #if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
197
+ #if (!defined(_MSC_VER) || _MSC_VER >= 1900) && (!defined(__MINGW32__) && !defined(__MINGW64__) || !defined(__WINPTHREADS_VERSION)) && (!defined(__GNUC__) || __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) && (!defined(__APPLE__) || !TARGET_OS_IPHONE) && !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(__MVS__)
198
198
  // Assume `thread_local` is fully supported in all other C++11 compilers/platforms
199
199
  //#define MOODYCAMEL_CPP11_THREAD_LOCAL_SUPPORTED // always disabled for now since several users report having problems with it on
200
200
  #endif
@@ -27,6 +27,8 @@ extern "C" {
27
27
  #elif defined(__unix__)
28
28
  #include <semaphore.h>
29
29
  #include <chrono>
30
+ #elif defined(__MVS__)
31
+ #include <zos-semaphore.h>
30
32
  #endif
31
33
 
32
34
  namespace duckdb_moodycamel
@@ -254,6 +256,80 @@ public:
254
256
  }
255
257
  }
256
258
  };
259
+ #elif defined(__MVS__)
260
+ //---------------------------------------------------------
261
+ // Semaphore (MVS aka z/OS)
262
+ //---------------------------------------------------------
263
+ class Semaphore
264
+ {
265
+ private:
266
+ sem_t m_sema;
267
+
268
+ Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
269
+ Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
270
+
271
+ public:
272
+ Semaphore(int initialCount = 0)
273
+ {
274
+ assert(initialCount >= 0);
275
+ int rc = sem_init(&m_sema, 0, initialCount);
276
+ assert(rc == 0);
277
+ (void)rc;
278
+ }
279
+
280
+ ~Semaphore()
281
+ {
282
+ sem_destroy(&m_sema);
283
+ }
284
+
285
+ bool wait()
286
+ {
287
+ // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
288
+ int rc;
289
+ do {
290
+ rc = sem_wait(&m_sema);
291
+ } while (rc == -1 && errno == EINTR);
292
+ return rc == 0;
293
+ }
294
+
295
+ bool try_wait()
296
+ {
297
+ int rc;
298
+ do {
299
+ rc = sem_trywait(&m_sema);
300
+ } while (rc == -1 && errno == EINTR);
301
+ return rc == 0;
302
+ }
303
+
304
+ bool timed_wait(std::uint64_t usecs)
305
+ {
306
+ struct timespec ts;
307
+ const int usecs_in_1_sec = 1000000;
308
+ const int nsecs_in_1_sec = 1000000000;
309
+
310
+ ts.tv_sec = usecs / usecs_in_1_sec;
311
+ ts.tv_nsec = (usecs % usecs_in_1_sec) * 1000;
312
+
313
+ int rc;
314
+ do {
315
+ rc = sem_timedwait(&m_sema, &ts);
316
+ } while (rc == -1 && errno == EINTR);
317
+ return rc == 0;
318
+ }
319
+
320
+ void signal()
321
+ {
322
+ while (sem_post(&m_sema) == -1);
323
+ }
324
+
325
+ void signal(int count)
326
+ {
327
+ while (count-- > 0)
328
+ {
329
+ while (sem_post(&m_sema) == -1);
330
+ }
331
+ }
332
+ };
257
333
  #else
258
334
  #error Unsupported platform! (No semaphore wrapper available)
259
335
  #endif
@@ -130,6 +130,8 @@ from_chars_result from_chars(const char *first, const char *last,
130
130
  #include <machine/endian.h>
131
131
  #elif defined(sun) || defined(__sun)
132
132
  #include <sys/byteorder.h>
133
+ #elif defined(__MVS__)
134
+ #include <sys/endian.h>
133
135
  #else
134
136
  #include <endian.h>
135
137
  #endif
@@ -183,7 +183,16 @@ using socket_t = SOCKET;
183
183
 
184
184
  #include <arpa/inet.h>
185
185
  #include <cstring>
186
+ #ifndef __MVS__
186
187
  #include <ifaddrs.h>
188
+ #endif
189
+ #ifdef __MVS__
190
+ #include <net/if.h>
191
+ #include <strings.h>
192
+ #ifndef NI_MAXHOST
193
+ #define NI_MAXHOST 1025
194
+ #endif
195
+ #endif
187
196
  #include <netdb.h>
188
197
  #include <netinet/in.h>
189
198
  #ifdef __linux__
@@ -2668,7 +2677,7 @@ inline bool bind_ip_address(socket_t sock, const char *host) {
2668
2677
  return ret;
2669
2678
  }
2670
2679
 
2671
- #if !defined _WIN32 && !defined ANDROID
2680
+ #if !defined _WIN32 && !defined ANDROID && !defined __MVS__
2672
2681
  #define USE_IF2IP
2673
2682
  #endif
2674
2683
 
@@ -1058,10 +1058,19 @@ typedef struct PGOnConflictClause {
1058
1058
  *
1059
1059
  * We don't currently support the SEARCH or CYCLE clause.
1060
1060
  */
1061
+
1062
+ typedef enum PGCTEMaterialize
1063
+ {
1064
+ PGCTEMaterializeDefault, /* no option specified */
1065
+ PGCTEMaterializeAlways, /* MATERIALIZED */
1066
+ PGCTEMaterializeNever /* NOT MATERIALIZED */
1067
+ } PGCTEMaterialize;
1068
+
1061
1069
  typedef struct PGCommonTableExpr {
1062
1070
  PGNodeTag type;
1063
1071
  char *ctename; /* query name (never qualified) */
1064
1072
  PGList *aliascolnames; /* optional list of column names */
1073
+ PGCTEMaterialize ctematerialized; /* is this an optimization fence? */
1065
1074
  /* SelectStmt/InsertStmt/etc before parse analysis, PGQuery afterwards: */
1066
1075
  PGNode *ctequery; /* the CTE's subquery */
1067
1076
  int location; /* token location, or -1 if unknown */
@@ -1049,6 +1049,7 @@ typedef union YYSTYPE
1049
1049
  PGAlias *alias;
1050
1050
  PGRangeVar *range;
1051
1051
  PGIntoClause *into;
1052
+ PGCTEMaterialize ctematerialize;
1052
1053
  PGWithClause *with;
1053
1054
  PGInferClause *infer;
1054
1055
  PGOnConflictClause *onconflict;
@@ -1068,7 +1069,7 @@ typedef union YYSTYPE
1068
1069
  PGInsertColumnOrder bynameorposition;
1069
1070
  }
1070
1071
  /* Line 1529 of yacc.c. */
1071
- #line 1072 "third_party/libpg_query/grammar/grammar_out.hpp"
1072
+ #line 1073 "third_party/libpg_query/grammar/grammar_out.hpp"
1072
1073
  YYSTYPE;
1073
1074
  # define yystype YYSTYPE /* obsolescent; will be withdrawn */
1074
1075
  # define YYSTYPE_IS_DECLARED 1