duckdb 0.8.2-dev157.0 → 0.8.2-dev1573.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (493) hide show
  1. package/binding.gyp +15 -12
  2. package/binding.gyp.in +1 -1
  3. package/configure.py +1 -1
  4. package/duckdb_extension_config.cmake +10 -0
  5. package/package.json +1 -1
  6. package/src/duckdb/extension/icu/icu-dateadd.cpp +2 -2
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +1 -1
  8. package/src/duckdb/extension/icu/icu-datepart.cpp +2 -2
  9. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  10. package/src/duckdb/extension/icu/icu-datetrunc.cpp +1 -1
  11. package/src/duckdb/extension/icu/icu-list-range.cpp +1 -1
  12. package/src/duckdb/extension/icu/icu-makedate.cpp +7 -0
  13. package/src/duckdb/extension/icu/icu-strptime.cpp +4 -4
  14. package/src/duckdb/extension/icu/icu-table-range.cpp +5 -5
  15. package/src/duckdb/extension/icu/icu-timebucket.cpp +16 -16
  16. package/src/duckdb/extension/icu/icu-timezone.cpp +8 -8
  17. package/src/duckdb/extension/icu/icu_extension.cpp +3 -3
  18. package/src/duckdb/extension/json/include/json_common.hpp +47 -231
  19. package/src/duckdb/extension/json/include/json_executors.hpp +49 -13
  20. package/src/duckdb/extension/json/include/json_functions.hpp +2 -1
  21. package/src/duckdb/extension/json/json_common.cpp +272 -40
  22. package/src/duckdb/extension/json/json_functions/json_structure.cpp +1 -1
  23. package/src/duckdb/extension/json/json_functions/json_transform.cpp +17 -37
  24. package/src/duckdb/extension/json/json_functions/json_type.cpp +1 -1
  25. package/src/duckdb/extension/json/json_functions.cpp +24 -24
  26. package/src/duckdb/extension/json/json_scan.cpp +3 -6
  27. package/src/duckdb/extension/parquet/column_reader.cpp +19 -21
  28. package/src/duckdb/extension/parquet/column_writer.cpp +77 -61
  29. package/src/duckdb/extension/parquet/include/cast_column_reader.hpp +2 -2
  30. package/src/duckdb/extension/parquet/include/column_reader.hpp +14 -16
  31. package/src/duckdb/extension/parquet/include/column_writer.hpp +9 -7
  32. package/src/duckdb/extension/parquet/include/list_column_reader.hpp +2 -2
  33. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +3 -3
  34. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +3 -3
  35. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  36. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +2 -2
  37. package/src/duckdb/extension/parquet/include/parquet_support.hpp +9 -11
  38. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +24 -5
  39. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -1
  40. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +2 -3
  41. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +2 -2
  42. package/src/duckdb/extension/parquet/parquet_extension.cpp +192 -20
  43. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -6
  44. package/src/duckdb/extension/parquet/parquet_statistics.cpp +7 -6
  45. package/src/duckdb/extension/parquet/parquet_writer.cpp +79 -16
  46. package/src/duckdb/extension/parquet/zstd_file_system.cpp +2 -2
  47. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -4
  49. package/src/duckdb/src/catalog/default/default_functions.cpp +16 -0
  50. package/src/duckdb/src/common/adbc/adbc.cpp +75 -10
  51. package/src/duckdb/src/common/adbc/driver_manager.cpp +6 -11
  52. package/src/duckdb/src/common/allocator.cpp +14 -2
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +5 -11
  54. package/src/duckdb/src/common/assert.cpp +3 -0
  55. package/src/duckdb/src/common/enum_util.cpp +4619 -4446
  56. package/src/duckdb/src/common/enums/logical_operator_type.cpp +4 -0
  57. package/src/duckdb/src/common/enums/optimizer_type.cpp +2 -0
  58. package/src/duckdb/src/common/enums/physical_operator_type.cpp +4 -0
  59. package/src/duckdb/src/common/exception.cpp +2 -2
  60. package/src/duckdb/src/common/extra_type_info.cpp +506 -0
  61. package/src/duckdb/src/common/file_system.cpp +19 -0
  62. package/src/duckdb/src/common/hive_partitioning.cpp +10 -6
  63. package/src/duckdb/src/common/local_file_system.cpp +14 -14
  64. package/src/duckdb/src/common/multi_file_reader.cpp +184 -20
  65. package/src/duckdb/src/common/operator/cast_operators.cpp +35 -1
  66. package/src/duckdb/src/common/radix_partitioning.cpp +26 -8
  67. package/src/duckdb/src/common/re2_regex.cpp +1 -1
  68. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  69. package/src/duckdb/src/common/sort/merge_sorter.cpp +9 -16
  70. package/src/duckdb/src/common/sort/partition_state.cpp +44 -11
  71. package/src/duckdb/src/common/types/batched_data_collection.cpp +7 -2
  72. package/src/duckdb/src/common/types/bit.cpp +51 -0
  73. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +9 -6
  74. package/src/duckdb/src/common/types/column/column_data_collection.cpp +17 -2
  75. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +15 -6
  76. package/src/duckdb/src/common/types/column/partitioned_column_data.cpp +2 -2
  77. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  78. package/src/duckdb/src/common/types/date.cpp +9 -0
  79. package/src/duckdb/src/common/types/list_segment.cpp +24 -74
  80. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +3 -9
  81. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +2 -0
  82. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  83. package/src/duckdb/src/common/types/uuid.cpp +2 -2
  84. package/src/duckdb/src/common/types/validity_mask.cpp +33 -0
  85. package/src/duckdb/src/common/types.cpp +8 -655
  86. package/src/duckdb/src/common/virtual_file_system.cpp +138 -1
  87. package/src/duckdb/src/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -0
  88. package/src/duckdb/src/core_functions/aggregate/nested/list.cpp +2 -2
  89. package/src/duckdb/src/core_functions/aggregate/regression/regr_avg.cpp +4 -4
  90. package/src/duckdb/src/core_functions/aggregate/regression/regr_intercept.cpp +4 -4
  91. package/src/duckdb/src/core_functions/aggregate/regression/regr_r2.cpp +5 -4
  92. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxx_syy.cpp +8 -8
  93. package/src/duckdb/src/core_functions/aggregate/regression/regr_sxy.cpp +4 -3
  94. package/src/duckdb/src/core_functions/function_list.cpp +4 -2
  95. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +208 -42
  96. package/src/duckdb/src/core_functions/scalar/date/epoch.cpp +10 -24
  97. package/src/duckdb/src/core_functions/scalar/date/make_date.cpp +19 -4
  98. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +4 -2
  99. package/src/duckdb/src/execution/aggregate_hashtable.cpp +34 -18
  100. package/src/duckdb/src/execution/expression_executor.cpp +1 -1
  101. package/src/duckdb/src/execution/index/art/art.cpp +149 -139
  102. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +1 -1
  103. package/src/duckdb/src/execution/index/art/iterator.cpp +129 -207
  104. package/src/duckdb/src/execution/index/art/leaf.cpp +8 -37
  105. package/src/duckdb/src/execution/index/art/node.cpp +113 -120
  106. package/src/duckdb/src/execution/index/art/node16.cpp +1 -10
  107. package/src/duckdb/src/execution/index/art/node256.cpp +1 -9
  108. package/src/duckdb/src/execution/index/art/node4.cpp +12 -13
  109. package/src/duckdb/src/execution/index/art/node48.cpp +1 -11
  110. package/src/duckdb/src/execution/index/art/prefix.cpp +228 -350
  111. package/src/duckdb/src/execution/join_hashtable.cpp +4 -4
  112. package/src/duckdb/src/execution/operator/aggregate/aggregate_object.cpp +1 -0
  113. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +8 -3
  114. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +32 -22
  115. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +512 -300
  116. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +4 -3
  117. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -5
  118. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +444 -284
  119. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +1 -1
  120. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +21 -10
  121. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +28 -12
  122. package/src/duckdb/src/execution/operator/join/physical_join.cpp +1 -1
  123. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +23 -4
  124. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +41 -5
  125. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +100 -13
  126. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +1 -1
  127. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +20 -0
  128. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +48 -0
  129. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +2 -3
  130. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +6 -4
  131. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +3 -3
  132. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -1
  133. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +2 -1
  134. package/src/duckdb/src/execution/operator/scan/physical_column_data_scan.cpp +19 -0
  135. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +7 -2
  136. package/src/duckdb/src/execution/operator/set/physical_cte.cpp +160 -0
  137. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +15 -5
  138. package/src/duckdb/src/execution/partitionable_hashtable.cpp +41 -6
  139. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +30 -5
  140. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +43 -10
  141. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +56 -33
  142. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +17 -13
  143. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +33 -0
  144. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +2 -2
  145. package/src/duckdb/src/execution/physical_plan/plan_recursive_cte.cpp +25 -4
  146. package/src/duckdb/src/execution/physical_plan_generator.cpp +4 -0
  147. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +290 -43
  148. package/src/duckdb/src/execution/window_segment_tree.cpp +286 -129
  149. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +2 -1
  150. package/src/duckdb/src/function/cast/bit_cast.cpp +34 -2
  151. package/src/duckdb/src/function/cast/blob_cast.cpp +3 -0
  152. package/src/duckdb/src/function/cast/numeric_casts.cpp +2 -0
  153. package/src/duckdb/src/function/function.cpp +3 -1
  154. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  155. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +212 -0
  156. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +249 -0
  157. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +29 -0
  158. package/src/duckdb/src/function/scalar/list/list_resize.cpp +162 -0
  159. package/src/duckdb/src/function/scalar/nested_functions.cpp +1 -0
  160. package/src/duckdb/src/function/scalar/string/like.cpp +12 -4
  161. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +12 -5
  162. package/src/duckdb/src/function/table/copy_csv.cpp +8 -1
  163. package/src/duckdb/src/function/table/read_csv.cpp +100 -17
  164. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  165. package/src/duckdb/src/function/table/table_scan.cpp +9 -0
  166. package/src/duckdb/src/function/table/version/pragma_version.cpp +46 -2
  167. package/src/duckdb/src/include/duckdb/common/adbc/adbc.hpp +1 -0
  168. package/src/duckdb/src/include/duckdb/common/allocator.hpp +2 -0
  169. package/src/duckdb/src/include/duckdb/common/bswap.hpp +42 -0
  170. package/src/duckdb/src/include/duckdb/common/dl.hpp +3 -1
  171. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +616 -584
  172. package/src/duckdb/src/include/duckdb/common/enums/cte_materialize.hpp +21 -0
  173. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +2 -1
  174. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +2 -0
  175. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +2 -0
  176. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +2 -0
  177. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +27 -0
  178. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +219 -0
  179. package/src/duckdb/src/include/duckdb/common/file_system.hpp +2 -0
  180. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  181. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +6 -4
  182. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +10 -42
  183. package/src/duckdb/src/include/duckdb/common/mutex.hpp +3 -0
  184. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +43 -3
  185. package/src/duckdb/src/include/duckdb/common/operator/numeric_cast.hpp +10 -0
  186. package/src/duckdb/src/include/duckdb/common/radix.hpp +9 -20
  187. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +6 -21
  188. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +3 -3
  189. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -0
  190. package/src/duckdb/src/include/duckdb/common/string_util.hpp +11 -0
  191. package/src/duckdb/src/include/duckdb/common/types/batched_data_collection.hpp +3 -1
  192. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +81 -0
  193. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -1
  194. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +6 -1
  195. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +1 -1
  196. package/src/duckdb/src/include/duckdb/common/types/column/column_data_scan_states.hpp +3 -1
  197. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +1 -1
  198. package/src/duckdb/src/include/duckdb/common/types/date.hpp +7 -5
  199. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +6 -8
  200. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -1
  201. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -0
  202. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -0
  203. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +9 -0
  204. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -15
  205. package/src/duckdb/src/include/duckdb/common/virtual_file_system.hpp +38 -97
  206. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/corr.hpp +4 -4
  207. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic/covar.hpp +3 -1
  208. package/src/duckdb/src/include/duckdb/core_functions/aggregate/algebraic_functions.hpp +3 -1
  209. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +3 -1
  210. package/src/duckdb/src/include/duckdb/core_functions/aggregate/holistic_functions.hpp +3 -1
  211. package/src/duckdb/src/include/duckdb/core_functions/aggregate/nested_functions.hpp +3 -1
  212. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_count.hpp +1 -0
  213. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression/regr_slope.hpp +3 -3
  214. package/src/duckdb/src/include/duckdb/core_functions/aggregate/regression_functions.hpp +3 -1
  215. package/src/duckdb/src/include/duckdb/core_functions/scalar/bit_functions.hpp +3 -1
  216. package/src/duckdb/src/include/duckdb/core_functions/scalar/blob_functions.hpp +3 -1
  217. package/src/duckdb/src/include/duckdb/core_functions/scalar/date_functions.hpp +31 -11
  218. package/src/duckdb/src/include/duckdb/core_functions/scalar/enum_functions.hpp +3 -1
  219. package/src/duckdb/src/include/duckdb/core_functions/scalar/generic_functions.hpp +3 -1
  220. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +3 -1
  221. package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +3 -1
  222. package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +3 -1
  223. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +3 -1
  224. package/src/duckdb/src/include/duckdb/core_functions/scalar/random_functions.hpp +3 -1
  225. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +3 -1
  226. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +3 -1
  227. package/src/duckdb/src/include/duckdb/core_functions/scalar/union_functions.hpp +3 -1
  228. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +21 -3
  229. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  230. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +4 -5
  231. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +31 -27
  232. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -14
  233. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +4 -10
  234. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +3 -6
  235. package/src/duckdb/src/include/duckdb/execution/index/art/node256.hpp +3 -6
  236. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +5 -8
  237. package/src/duckdb/src/include/duckdb/execution/index/art/node48.hpp +3 -6
  238. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +63 -52
  239. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +3 -10
  240. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  241. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +1 -1
  242. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +12 -1
  243. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +2 -2
  244. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +4 -3
  245. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +8 -1
  246. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +36 -0
  247. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +1 -1
  248. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_column_data_scan.hpp +10 -0
  249. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +5 -1
  250. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +62 -0
  251. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_recursive_cte.hpp +8 -2
  252. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +5 -1
  253. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -0
  254. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +10 -3
  255. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +51 -40
  256. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +1 -1
  257. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -2
  258. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +1 -0
  259. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +49 -0
  260. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  261. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +5 -0
  262. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -0
  263. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  264. package/src/duckdb/src/include/duckdb/main/client_config.hpp +5 -0
  265. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  266. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -2
  267. package/src/duckdb/src/include/duckdb/main/relation/cross_product_relation.hpp +4 -1
  268. package/src/duckdb/src/include/duckdb/main/relation/join_relation.hpp +5 -2
  269. package/src/duckdb/src/include/duckdb/main/relation.hpp +4 -2
  270. package/src/duckdb/src/include/duckdb/main/settings.hpp +39 -1
  271. package/src/duckdb/src/include/duckdb/optimizer/column_binding_replacer.hpp +47 -0
  272. package/src/duckdb/src/include/duckdb/optimizer/compressed_materialization.hpp +132 -0
  273. package/src/duckdb/src/include/duckdb/optimizer/deliminator.hpp +13 -16
  274. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +3 -0
  275. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +1 -1
  276. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +10 -1
  277. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp +1 -1
  278. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +1 -1
  279. package/src/duckdb/src/include/duckdb/optimizer/join_order/query_graph.hpp +3 -0
  280. package/src/duckdb/src/include/duckdb/optimizer/matcher/set_matcher.hpp +13 -0
  281. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +3 -0
  282. package/src/duckdb/src/include/duckdb/optimizer/remove_duplicate_groups.hpp +40 -0
  283. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +11 -3
  284. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +2 -0
  285. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +2 -0
  286. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +5 -0
  287. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  288. package/src/duckdb/src/include/duckdb/parser/expression/between_expression.hpp +3 -0
  289. package/src/duckdb/src/include/duckdb/parser/expression/cast_expression.hpp +3 -0
  290. package/src/duckdb/src/include/duckdb/parser/expression/collate_expression.hpp +3 -0
  291. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  292. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +3 -0
  293. package/src/duckdb/src/include/duckdb/parser/expression/constant_expression.hpp +3 -0
  294. package/src/duckdb/src/include/duckdb/parser/expression/default_expression.hpp +1 -0
  295. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +3 -0
  296. package/src/duckdb/src/include/duckdb/parser/expression/lambda_expression.hpp +3 -0
  297. package/src/duckdb/src/include/duckdb/parser/expression/positional_reference_expression.hpp +3 -0
  298. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +3 -0
  299. package/src/duckdb/src/include/duckdb/parser/query_node/cte_node.hpp +54 -0
  300. package/src/duckdb/src/include/duckdb/parser/query_node/list.hpp +1 -0
  301. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  302. package/src/duckdb/src/include/duckdb/parser/tableref/emptytableref.hpp +1 -0
  303. package/src/duckdb/src/include/duckdb/parser/tableref/joinref.hpp +1 -1
  304. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +3 -0
  305. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -0
  306. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +15 -8
  307. package/src/duckdb/src/include/duckdb/planner/binder.hpp +12 -5
  308. package/src/duckdb/src/include/duckdb/planner/bound_tokens.hpp +1 -0
  309. package/src/duckdb/src/include/duckdb/planner/column_binding.hpp +4 -0
  310. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +3 -3
  311. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +0 -2
  312. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  313. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +2 -1
  314. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -5
  315. package/src/duckdb/src/include/duckdb/planner/operator/logical_cteref.hpp +7 -2
  316. package/src/duckdb/src/include/duckdb/planner/operator/logical_dependent_join.hpp +43 -0
  317. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +4 -0
  318. package/src/duckdb/src/include/duckdb/planner/operator/logical_materialized_cte.hpp +49 -0
  319. package/src/duckdb/src/include/duckdb/planner/operator/logical_recursive_cte.hpp +5 -4
  320. package/src/duckdb/src/include/duckdb/planner/query_node/bound_cte_node.hpp +44 -0
  321. package/src/duckdb/src/include/duckdb/planner/query_node/list.hpp +1 -0
  322. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +2 -2
  323. package/src/duckdb/src/include/duckdb/planner/subquery/has_correlated_expressions.hpp +4 -1
  324. package/src/duckdb/src/include/duckdb/planner/subquery/recursive_dependent_join_planner.hpp +31 -0
  325. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_correlated_expressions.hpp +8 -2
  326. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +5 -2
  327. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -1
  328. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -3
  329. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -1
  330. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +22 -0
  331. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +2 -0
  332. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +4 -0
  333. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -0
  334. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +3 -2
  335. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +1 -3
  336. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +2 -3
  337. package/src/duckdb/src/include/duckdb.h +28 -0
  338. package/src/duckdb/src/main/capi/arrow-c.cpp +155 -1
  339. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +1 -1
  340. package/src/duckdb/src/main/config.cpp +4 -0
  341. package/src/duckdb/src/main/database.cpp +1 -1
  342. package/src/duckdb/src/main/extension/extension_helper.cpp +96 -89
  343. package/src/duckdb/src/main/extension/extension_install.cpp +9 -0
  344. package/src/duckdb/src/main/extension/extension_load.cpp +10 -1
  345. package/src/duckdb/src/main/relation/cross_product_relation.cpp +4 -3
  346. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  347. package/src/duckdb/src/main/relation.cpp +6 -5
  348. package/src/duckdb/src/main/settings/settings.cpp +79 -18
  349. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +43 -0
  350. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -2
  351. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +140 -0
  352. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +42 -0
  353. package/src/duckdb/src/optimizer/compressed_materialization/compress_order.cpp +65 -0
  354. package/src/duckdb/src/optimizer/compressed_materialization.cpp +478 -0
  355. package/src/duckdb/src/optimizer/deliminator.cpp +176 -321
  356. package/src/duckdb/src/optimizer/filter_pushdown.cpp +9 -0
  357. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +7 -0
  358. package/src/duckdb/src/optimizer/join_order/join_node.cpp +2 -2
  359. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +113 -82
  360. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +2 -6
  361. package/src/duckdb/src/optimizer/join_order/query_graph.cpp +22 -14
  362. package/src/duckdb/src/optimizer/optimizer.cpp +51 -14
  363. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +5 -5
  364. package/src/duckdb/src/optimizer/pushdown/pushdown_get.cpp +0 -1
  365. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +127 -0
  366. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +4 -0
  367. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +154 -15
  368. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +65 -8
  369. package/src/duckdb/src/optimizer/statistics/operator/propagate_order.cpp +1 -1
  370. package/src/duckdb/src/optimizer/statistics_propagator.cpp +7 -5
  371. package/src/duckdb/src/optimizer/topn_optimizer.cpp +20 -10
  372. package/src/duckdb/src/parallel/executor.cpp +15 -0
  373. package/src/duckdb/src/parallel/task_scheduler.cpp +11 -2
  374. package/src/duckdb/src/parser/common_table_expression_info.cpp +2 -0
  375. package/src/duckdb/src/parser/expression/between_expression.cpp +3 -15
  376. package/src/duckdb/src/parser/expression/case_expression.cpp +0 -13
  377. package/src/duckdb/src/parser/expression/cast_expression.cpp +3 -14
  378. package/src/duckdb/src/parser/expression/collate_expression.cpp +3 -13
  379. package/src/duckdb/src/parser/expression/columnref_expression.cpp +3 -12
  380. package/src/duckdb/src/parser/expression/comparison_expression.cpp +3 -13
  381. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +0 -12
  382. package/src/duckdb/src/parser/expression/constant_expression.cpp +3 -11
  383. package/src/duckdb/src/parser/expression/default_expression.cpp +0 -4
  384. package/src/duckdb/src/parser/expression/function_expression.cpp +3 -32
  385. package/src/duckdb/src/parser/expression/lambda_expression.cpp +4 -14
  386. package/src/duckdb/src/parser/expression/operator_expression.cpp +0 -12
  387. package/src/duckdb/src/parser/expression/parameter_expression.cpp +0 -12
  388. package/src/duckdb/src/parser/expression/positional_reference_expression.cpp +4 -11
  389. package/src/duckdb/src/parser/expression/star_expression.cpp +0 -19
  390. package/src/duckdb/src/parser/expression/subquery_expression.cpp +0 -18
  391. package/src/duckdb/src/parser/expression/window_expression.cpp +3 -39
  392. package/src/duckdb/src/parser/parsed_expression.cpp +0 -70
  393. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -0
  394. package/src/duckdb/src/parser/parser.cpp +8 -2
  395. package/src/duckdb/src/parser/query_node/cte_node.cpp +58 -0
  396. package/src/duckdb/src/parser/query_node/recursive_cte_node.cpp +0 -19
  397. package/src/duckdb/src/parser/query_node/select_node.cpp +0 -29
  398. package/src/duckdb/src/parser/query_node/set_operation_node.cpp +0 -15
  399. package/src/duckdb/src/parser/query_node.cpp +15 -37
  400. package/src/duckdb/src/parser/result_modifier.cpp +0 -74
  401. package/src/duckdb/src/parser/tableref/basetableref.cpp +0 -19
  402. package/src/duckdb/src/parser/tableref/emptytableref.cpp +0 -4
  403. package/src/duckdb/src/parser/tableref/expressionlistref.cpp +0 -15
  404. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -23
  405. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -23
  406. package/src/duckdb/src/parser/tableref/subqueryref.cpp +3 -13
  407. package/src/duckdb/src/parser/tableref/table_function.cpp +0 -15
  408. package/src/duckdb/src/parser/tableref.cpp +0 -44
  409. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +55 -3
  410. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -0
  411. package/src/duckdb/src/parser/transform/expression/transform_multi_assign_reference.cpp +44 -0
  412. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +19 -1
  413. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +13 -0
  414. package/src/duckdb/src/parser/transform/statement/transform_delete.cpp +6 -1
  415. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +6 -1
  416. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +7 -2
  417. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +14 -11
  418. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +11 -2
  419. package/src/duckdb/src/parser/transform/statement/transform_update.cpp +6 -1
  420. package/src/duckdb/src/parser/transformer.cpp +15 -0
  421. package/src/duckdb/src/planner/binder/query_node/bind_cte_node.cpp +64 -0
  422. package/src/duckdb/src/planner/binder/query_node/plan_cte_node.cpp +26 -0
  423. package/src/duckdb/src/planner/binder/query_node/plan_recursive_cte_node.cpp +5 -5
  424. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +4 -4
  425. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +32 -29
  426. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +5 -4
  427. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +11 -2
  428. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +32 -5
  429. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +116 -49
  430. package/src/duckdb/src/planner/binder/tableref/plan_cteref.cpp +1 -1
  431. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +64 -26
  432. package/src/duckdb/src/planner/binder/tableref/plan_subqueryref.cpp +3 -3
  433. package/src/duckdb/src/planner/binder.cpp +44 -31
  434. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +1 -1
  435. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +4 -31
  436. package/src/duckdb/src/planner/expression_binder.cpp +3 -0
  437. package/src/duckdb/src/planner/expression_iterator.cpp +6 -0
  438. package/src/duckdb/src/planner/logical_operator.cpp +5 -0
  439. package/src/duckdb/src/planner/logical_operator_visitor.cpp +2 -0
  440. package/src/duckdb/src/planner/operator/logical_cteref.cpp +3 -1
  441. package/src/duckdb/src/planner/operator/logical_dependent_join.cpp +26 -0
  442. package/src/duckdb/src/planner/operator/logical_get.cpp +9 -4
  443. package/src/duckdb/src/planner/operator/logical_materialized_cte.cpp +21 -0
  444. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +90 -38
  445. package/src/duckdb/src/planner/subquery/has_correlated_expressions.cpp +22 -7
  446. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +65 -7
  447. package/src/duckdb/src/storage/arena_allocator.cpp +1 -2
  448. package/src/duckdb/src/storage/buffer/block_manager.cpp +3 -0
  449. package/src/duckdb/src/storage/checkpoint_manager.cpp +3 -0
  450. package/src/duckdb/src/storage/compression/rle.cpp +0 -1
  451. package/src/duckdb/src/storage/data_table.cpp +1 -1
  452. package/src/duckdb/src/storage/local_storage.cpp +3 -3
  453. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +340 -0
  454. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +122 -0
  455. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +86 -0
  456. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +166 -0
  457. package/src/duckdb/src/storage/serialization/serialize_types.cpp +127 -0
  458. package/src/duckdb/src/storage/single_file_block_manager.cpp +23 -0
  459. package/src/duckdb/src/storage/statistics/string_stats.cpp +21 -2
  460. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  461. package/src/duckdb/src/storage/table/chunk_info.cpp +17 -0
  462. package/src/duckdb/src/storage/table/row_group.cpp +25 -9
  463. package/src/duckdb/src/storage/table/row_group_collection.cpp +19 -18
  464. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +2 -2
  465. package/src/duckdb/third_party/concurrentqueue/lightweightsemaphore.h +76 -0
  466. package/src/duckdb/third_party/fast_float/fast_float/fast_float.h +2 -0
  467. package/src/duckdb/third_party/httplib/httplib.hpp +10 -1
  468. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +9 -0
  469. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  470. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12487 -12331
  471. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  472. package/src/duckdb/ub_src_common.cpp +2 -0
  473. package/src/duckdb/ub_src_execution_index_art.cpp +0 -2
  474. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  475. package/src/duckdb/ub_src_execution_operator_set.cpp +2 -0
  476. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  477. package/src/duckdb/ub_src_function_scalar.cpp +2 -0
  478. package/src/duckdb/ub_src_function_scalar_compressed_materialization.cpp +4 -0
  479. package/src/duckdb/ub_src_function_scalar_list.cpp +2 -0
  480. package/src/duckdb/ub_src_optimizer.cpp +6 -0
  481. package/src/duckdb/ub_src_optimizer_compressed_materialization.cpp +6 -0
  482. package/src/duckdb/ub_src_optimizer_statistics_expression.cpp +0 -2
  483. package/src/duckdb/ub_src_parser_query_node.cpp +2 -0
  484. package/src/duckdb/ub_src_parser_transform_expression.cpp +2 -0
  485. package/src/duckdb/ub_src_planner_binder_query_node.cpp +4 -0
  486. package/src/duckdb/ub_src_planner_operator.cpp +4 -0
  487. package/src/duckdb/ub_src_storage_serialization.cpp +10 -0
  488. package/src/statement.cpp +10 -3
  489. package/test/test_all_types.test.ts +233 -0
  490. package/tsconfig.json +1 -0
  491. package/src/duckdb/src/execution/index/art/prefix_segment.cpp +0 -42
  492. package/src/duckdb/src/include/duckdb/execution/index/art/prefix_segment.hpp +0 -40
  493. package/src/duckdb/src/optimizer/statistics/expression/propagate_and_compress.cpp +0 -118
@@ -16,6 +16,7 @@
16
16
 
17
17
  namespace duckdb {
18
18
 
19
+ //! JSON allocator is a custom allocator for yyjson that prevents many tiny allocations
19
20
  class JSONAllocator {
20
21
  public:
21
22
  explicit JSONAllocator(Allocator &allocator)
@@ -50,6 +51,7 @@ private:
50
51
  yyjson_alc yyjson_allocator;
51
52
  };
52
53
 
54
+ //! JSONKey / json_key_map_t speeds up mapping from JSON key to column ID
53
55
  struct JSONKey {
54
56
  const char *ptr;
55
57
  size_t len;
@@ -81,8 +83,10 @@ template <typename T>
81
83
  using json_key_map_t = unordered_map<JSONKey, T, JSONKeyHash, JSONKeyEquality>;
82
84
  using json_key_set_t = unordered_set<JSONKey, JSONKeyHash, JSONKeyEquality>;
83
85
 
86
+ //! Common JSON functionality for most JSON functions
84
87
  struct JSONCommon {
85
88
  public:
89
+ //! The JSON logical type, registered when the extension is loaded
86
90
  static constexpr auto JSON_TYPE_NAME = "JSON";
87
91
 
88
92
  static const LogicalType JSONType() {
@@ -114,9 +118,8 @@ public:
114
118
  static constexpr char const *TYPE_STRING_ARRAY = "ARRAY";
115
119
  static constexpr char const *TYPE_STRING_OBJECT = "OBJECT";
116
120
 
117
- template <class YYJSON_VAL_T>
118
- static inline const char *ValTypeToString(YYJSON_VAL_T *val) {
119
- switch (GetTag<YYJSON_VAL_T>(val)) {
121
+ static inline const char *ValTypeToString(yyjson_val *val) {
122
+ switch (yyjson_get_tag(val)) {
120
123
  case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
121
124
  return JSONCommon::TYPE_STRING_NULL;
122
125
  case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
@@ -139,14 +142,12 @@ public:
139
142
  }
140
143
  }
141
144
 
142
- template <class YYJSON_VAL_T>
143
- static inline constexpr string_t ValTypeToStringT(YYJSON_VAL_T *val) {
144
- return string_t(ValTypeToString<YYJSON_VAL_T>(val));
145
+ static inline string_t ValTypeToStringT(yyjson_val *val) {
146
+ return string_t(ValTypeToString(val));
145
147
  }
146
148
 
147
- template <class YYJSON_VAL_T>
148
- static inline LogicalTypeId ValTypeToLogicalTypeId(YYJSON_VAL_T *val) {
149
- switch (GetTag<YYJSON_VAL_T>(val)) {
149
+ static inline LogicalTypeId ValTypeToLogicalTypeId(yyjson_val *val) {
150
+ switch (yyjson_get_tag(val)) {
150
151
  case YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE:
151
152
  return LogicalTypeId::SQLNULL;
152
153
  case YYJSON_TYPE_STR | YYJSON_SUBTYPE_NONE:
@@ -170,6 +171,9 @@ public:
170
171
  }
171
172
 
172
173
  public:
174
+ //===--------------------------------------------------------------------===//
175
+ // Document creation / reading / writing
176
+ //===--------------------------------------------------------------------===//
173
177
  template <class T>
174
178
  static T *AllocateArray(yyjson_alc *alc, idx_t count) {
175
179
  return reinterpret_cast<T *>(alc->malloc(alc->ctx, sizeof(T) * count));
@@ -204,6 +208,7 @@ public:
204
208
  static inline yyjson_doc *ReadDocument(const string_t &input, const yyjson_read_flag flg, yyjson_alc *alc) {
205
209
  return ReadDocument(input.GetDataWriteable(), input.GetSize(), flg, alc);
206
210
  }
211
+
207
212
  static string FormatParseError(const char *data, idx_t length, yyjson_read_err &error, const string &extra = "") {
208
213
  D_ASSERT(error.code != YYJSON_READ_SUCCESS);
209
214
  // Truncate, so we don't print megabytes worth of JSON
@@ -228,203 +233,77 @@ public:
228
233
  auto data = WriteVal<YYJSON_VAL_T>(val, alc, len);
229
234
  return string_t(data, len);
230
235
  }
236
+
237
+ //! Slow and easy ToString for errors
231
238
  static string ValToString(yyjson_val *val, idx_t max_len = DConstants::INVALID_INDEX);
232
239
  //! Throw an error with the printed yyjson_val
233
240
  static void ThrowValFormatError(string error_string, yyjson_val *val);
234
241
 
235
242
  public:
236
- //! Validate path with $ syntax
237
- static void ValidatePathDollar(const char *ptr, const idx_t &len);
243
+ //===--------------------------------------------------------------------===//
244
+ // JSON pointer / path
245
+ //===--------------------------------------------------------------------===//
246
+ enum class JSONPathType : uint8_t {
247
+ //! Extract a single value
248
+ REGULAR = 0,
249
+ //! Extract multiple values (when we have a '*' wildcard in the JSON Path)
250
+ WILDCARD = 1,
251
+ };
238
252
 
239
253
  //! Get JSON value using JSON path query (safe, checks the path query)
240
- template <class YYJSON_VAL_T>
241
- static inline YYJSON_VAL_T *GetPointer(YYJSON_VAL_T *root, const string_t &path_str) {
254
+ static inline yyjson_val *Get(yyjson_val *val, const string_t &path_str) {
242
255
  auto ptr = path_str.GetData();
243
256
  auto len = path_str.GetSize();
244
257
  if (len == 0) {
245
- return GetPointerUnsafe<YYJSON_VAL_T>(root, ptr, len);
258
+ return GetUnsafe(val, ptr, len);
246
259
  }
247
260
  switch (*ptr) {
248
261
  case '/': {
249
262
  // '/' notation must be '\0'-terminated
250
263
  auto str = string(ptr, len);
251
- return GetPointerUnsafe<YYJSON_VAL_T>(root, str.c_str(), len);
264
+ return GetUnsafe(val, str.c_str(), len);
252
265
  }
253
266
  case '$': {
254
- ValidatePathDollar(ptr, len);
255
- return GetPointerUnsafe<YYJSON_VAL_T>(root, ptr, len);
267
+ if (ValidatePath(ptr, len, false) == JSONPathType::WILDCARD) {
268
+ throw InvalidInputException(
269
+ "JSON path cannot contain wildcards if the path is not a constant parameter");
270
+ }
271
+ return GetUnsafe(val, ptr, len);
256
272
  }
257
273
  default:
258
274
  auto str = "/" + string(ptr, len);
259
- return GetPointerUnsafe<YYJSON_VAL_T>(root, str.c_str(), len + 1);
275
+ return GetUnsafe(val, str.c_str(), len + 1);
260
276
  }
261
277
  }
262
278
 
263
279
  //! Get JSON value using JSON path query (unsafe)
264
- template <class YYJSON_VAL_T>
265
- static inline YYJSON_VAL_T *GetPointerUnsafe(YYJSON_VAL_T *root, const char *ptr, const idx_t &len) {
280
+ static inline yyjson_val *GetUnsafe(yyjson_val *val, const char *ptr, const idx_t &len) {
266
281
  if (len == 0) {
267
282
  return nullptr;
268
283
  }
269
284
  switch (*ptr) {
270
285
  case '/':
271
- return TemplatedGetPointer<YYJSON_VAL_T>(root, ptr, len);
286
+ return GetPointer(val, ptr, len);
272
287
  case '$':
273
- return TemplatedGetPointerDollar<YYJSON_VAL_T>(root, ptr, len);
288
+ return GetPath(val, ptr, len);
274
289
  default:
275
- throw InternalException("JSON path does not start with '/' or '$'");
290
+ throw InternalException("JSON pointer/path does not start with '/' or '$'");
276
291
  }
277
292
  }
278
293
 
279
- private:
280
- //! Get JSON pointer using /field/index/... notation
281
- template <class YYJSON_VAL_T>
282
- static inline YYJSON_VAL_T *TemplatedGetPointer(YYJSON_VAL_T *root, const char *ptr, const idx_t &len) {
283
- throw InternalException("Unknown yyjson value type");
284
- }
285
-
286
- //! Get JSON pointer using $.field[index]... notation
287
- template <class YYJSON_VAL_T>
288
- static YYJSON_VAL_T *TemplatedGetPointerDollar(YYJSON_VAL_T *val, const char *ptr, const idx_t &len) {
289
- if (len == 1) {
290
- // Just '$'
291
- return val;
292
- }
293
- const char *const end = ptr + len;
294
- // Skip past '$'
295
- ptr++;
296
- while (val != nullptr && ptr != end) {
297
- const auto &c = *ptr++;
298
- if (c == '.') {
299
- // Object
300
- if (!IsObj<YYJSON_VAL_T>(val)) {
301
- return nullptr;
302
- }
303
- bool escaped = false;
304
- if (*ptr == '"') {
305
- // Skip past opening '"'
306
- ptr++;
307
- escaped = true;
308
- }
309
- auto key_len = ReadString(ptr, end, escaped);
310
- val = ObjGetN<YYJSON_VAL_T>(val, ptr, key_len);
311
- ptr += key_len;
312
- if (escaped) {
313
- // Skip past closing '"'
314
- ptr++;
315
- }
316
- } else if (c == '[') {
317
- // Array
318
- if (!IsArr<YYJSON_VAL_T>(val)) {
319
- return nullptr;
320
- }
321
- bool from_back = false;
322
- if (*ptr == '#') {
323
- // Index from back of array
324
- ptr++;
325
- if (*ptr == ']') {
326
- return nullptr;
327
- }
328
- from_back = true;
329
- // Skip past '-'
330
- ptr++;
331
- }
332
- // Read index
333
- idx_t idx;
334
- auto idx_len = ReadIndex(ptr, end, idx);
335
- if (from_back) {
336
- auto arr_size = ArrSize<YYJSON_VAL_T>(val);
337
- idx = idx > arr_size ? arr_size : arr_size - idx;
338
- }
339
- val = ArrGet<YYJSON_VAL_T>(val, idx);
340
- ptr += idx_len;
341
- // Skip past closing ']'
342
- ptr++;
343
- } else {
344
- throw InternalException("Unexpected char when parsing JSON path");
345
- }
346
- }
347
- return val;
348
- }
349
-
350
- static inline idx_t ReadString(const char *ptr, const char *const end, const bool escaped) {
351
- const char *const before = ptr;
352
- if (escaped) {
353
- while (ptr != end) {
354
- if (*ptr == '"') {
355
- break;
356
- }
357
- ptr++;
358
- }
359
- return ptr == end ? 0 : ptr - before;
360
- } else {
361
- while (ptr != end) {
362
- if (*ptr == '.' || *ptr == '[') {
363
- break;
364
- }
365
- ptr++;
366
- }
367
- return ptr - before;
368
- }
369
- }
370
-
371
- static constexpr auto IDX_T_SAFE_DIG = 19;
372
- static constexpr auto IDX_T_MAX = ((idx_t)(~(idx_t)0));
294
+ //! Get JSON value using JSON path query (unsafe)
295
+ static void GetWildcardPath(yyjson_val *val, const char *ptr, const idx_t &len, vector<yyjson_val *> &vals);
373
296
 
374
- static inline idx_t ReadIndex(const char *ptr, const char *const end, idx_t &idx) {
375
- const char *const before = ptr;
376
- idx = 0;
377
- for (idx_t i = 0; i < IDX_T_SAFE_DIG; i++) {
378
- if (ptr == end) {
379
- // No closing ']'
380
- return 0;
381
- }
382
- if (*ptr == ']') {
383
- break;
384
- }
385
- uint8_t add = (uint8_t)(*ptr - '0');
386
- if (add <= 9) {
387
- idx = add + idx * 10;
388
- } else {
389
- // Not a digit
390
- return 0;
391
- }
392
- ptr++;
393
- }
394
- // Invalid if overflow
395
- return idx >= (idx_t)IDX_T_MAX ? 0 : ptr - before;
396
- }
297
+ //! Validate JSON Path ($.field[index]... syntax), returns true if there are wildcards in the path
298
+ static JSONPathType ValidatePath(const char *ptr, const idx_t &len, const bool binder);
397
299
 
398
300
  private:
399
- template <class YYJSON_VAL_T>
400
- static inline bool IsObj(YYJSON_VAL_T *val) {
401
- throw InternalException("Unknown yyjson value type");
402
- }
403
-
404
- template <class YYJSON_VAL_T>
405
- static inline YYJSON_VAL_T *ObjGetN(YYJSON_VAL_T *val, const char *ptr, idx_t key_len) {
406
- throw InternalException("Unknown yyjson value type");
407
- }
408
-
409
- template <class YYJSON_VAL_T>
410
- static inline bool IsArr(YYJSON_VAL_T *val) {
411
- throw InternalException("Unknown yyjson value type");
412
- }
413
-
414
- template <class YYJSON_VAL_T>
415
- static inline size_t ArrSize(YYJSON_VAL_T *val) {
416
- throw InternalException("Unknown yyjson value type");
417
- }
418
-
419
- template <class YYJSON_VAL_T>
420
- static inline YYJSON_VAL_T *ArrGet(YYJSON_VAL_T *val, idx_t index) {
421
- throw InternalException("Unknown yyjson value type");
422
- }
423
-
424
- template <class YYJSON_VAL_T>
425
- static inline yyjson_type GetTag(YYJSON_VAL_T *val) {
426
- throw InternalException("Unknown yyjson value type");
301
+ //! Get JSON pointer (/field/index/... syntax)
302
+ static inline yyjson_val *GetPointer(yyjson_val *val, const char *ptr, const idx_t &len) {
303
+ return len == 1 ? val : unsafe_yyjson_get_pointer(val, ptr, len);
427
304
  }
305
+ //! Get JSON path ($.field[index]... syntax)
306
+ static yyjson_val *GetPath(yyjson_val *val, const char *ptr, const idx_t &len);
428
307
  };
429
308
 
430
309
  template <>
@@ -436,67 +315,4 @@ inline char *JSONCommon::WriteVal(yyjson_mut_val *val, yyjson_alc *alc, idx_t &l
436
315
  return yyjson_mut_val_write_opts(val, JSONCommon::WRITE_FLAG, alc, reinterpret_cast<size_t *>(&len), nullptr);
437
316
  }
438
317
 
439
- template <>
440
- inline yyjson_val *JSONCommon::TemplatedGetPointer(yyjson_val *root, const char *ptr, const idx_t &len) {
441
- return len == 1 ? root : unsafe_yyjson_get_pointer(root, ptr, len);
442
- }
443
- template <>
444
- inline yyjson_mut_val *JSONCommon::TemplatedGetPointer(yyjson_mut_val *root, const char *ptr, const idx_t &len) {
445
- return len == 1 ? root : unsafe_yyjson_mut_get_pointer(root, ptr, len);
446
- }
447
-
448
- template <>
449
- inline bool JSONCommon::IsObj(yyjson_val *val) {
450
- return yyjson_is_obj(val);
451
- }
452
- template <>
453
- inline bool JSONCommon::IsObj(yyjson_mut_val *val) {
454
- return yyjson_mut_is_obj(val);
455
- }
456
-
457
- template <>
458
- inline yyjson_val *JSONCommon::ObjGetN(yyjson_val *val, const char *ptr, idx_t key_len) {
459
- return yyjson_obj_getn(val, ptr, key_len);
460
- }
461
- template <>
462
- inline yyjson_mut_val *JSONCommon::ObjGetN(yyjson_mut_val *val, const char *ptr, idx_t key_len) {
463
- return yyjson_mut_obj_getn(val, ptr, key_len);
464
- }
465
-
466
- template <>
467
- inline bool JSONCommon::IsArr(yyjson_val *val) {
468
- return yyjson_is_arr(val);
469
- }
470
- template <>
471
- inline bool JSONCommon::IsArr(yyjson_mut_val *val) {
472
- return yyjson_mut_is_arr(val);
473
- }
474
-
475
- template <>
476
- inline size_t JSONCommon::ArrSize(yyjson_val *val) {
477
- return yyjson_arr_size(val);
478
- }
479
- template <>
480
- inline size_t JSONCommon::ArrSize(yyjson_mut_val *val) {
481
- return yyjson_mut_arr_size(val);
482
- }
483
-
484
- template <>
485
- inline yyjson_val *JSONCommon::ArrGet(yyjson_val *val, idx_t index) {
486
- return yyjson_arr_get(val, index);
487
- }
488
- template <>
489
- inline yyjson_mut_val *JSONCommon::ArrGet(yyjson_mut_val *val, idx_t index) {
490
- return yyjson_mut_arr_get(val, index);
491
- }
492
-
493
- template <>
494
- inline yyjson_type JSONCommon::GetTag(yyjson_val *val) {
495
- return yyjson_get_tag(val);
496
- }
497
- template <>
498
- inline yyjson_type JSONCommon::GetTag(yyjson_mut_val *val) {
499
- return yyjson_mut_get_tag(val);
500
- }
501
-
502
318
  } // namespace duckdb
@@ -42,23 +42,59 @@ public:
42
42
  if (info.constant) { // Constant path
43
43
  const char *ptr = info.ptr;
44
44
  const idx_t &len = info.len;
45
- UnaryExecutor::ExecuteWithNulls<string_t, T>(
46
- inputs, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) {
47
- auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
48
- auto val = JSONCommon::GetPointerUnsafe<yyjson_val>(doc->root, ptr, len);
49
- if (!val || unsafe_yyjson_is_null(val)) {
50
- mask.SetInvalid(idx);
51
- return T {};
52
- } else {
53
- return fun(val, alc, result);
54
- }
55
- });
45
+ if (info.path_type == JSONCommon::JSONPathType::REGULAR) {
46
+ UnaryExecutor::ExecuteWithNulls<string_t, T>(
47
+ inputs, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) {
48
+ auto doc =
49
+ JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
50
+ auto val = JSONCommon::GetUnsafe(doc->root, ptr, len);
51
+ if (!val || unsafe_yyjson_is_null(val)) {
52
+ mask.SetInvalid(idx);
53
+ return T {};
54
+ } else {
55
+ return fun(val, alc, result);
56
+ }
57
+ });
58
+ } else {
59
+ D_ASSERT(info.path_type == JSONCommon::JSONPathType::WILDCARD);
60
+ vector<yyjson_val *> vals;
61
+ UnaryExecutor::Execute<string_t, list_entry_t>(inputs, result, args.size(), [&](string_t input) {
62
+ vals.clear();
63
+
64
+ auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
65
+ JSONCommon::GetWildcardPath(doc->root, ptr, len, vals);
66
+
67
+ auto current_size = ListVector::GetListSize(result);
68
+ auto new_size = current_size + vals.size();
69
+ if (ListVector::GetListCapacity(result) < new_size) {
70
+ ListVector::Reserve(result, new_size);
71
+ }
72
+
73
+ auto &child_entry = ListVector::GetEntry(result);
74
+ auto child_vals = FlatVector::GetData<T>(child_entry);
75
+ auto &child_validity = FlatVector::Validity(child_entry);
76
+ for (idx_t i = 0; i < vals.size(); i++) {
77
+ auto &val = vals[i];
78
+ D_ASSERT(val != nullptr); // Wildcard extract shouldn't give back nullptrs
79
+ if (unsafe_yyjson_is_null(val)) {
80
+ child_validity.SetInvalid(current_size + i);
81
+ } else {
82
+ child_vals[current_size + i] = fun(val, alc, result);
83
+ }
84
+ }
85
+
86
+ ListVector::SetListSize(result, new_size);
87
+
88
+ return list_entry_t {current_size, vals.size()};
89
+ });
90
+ }
56
91
  } else { // Columnref path
92
+ D_ASSERT(info.path_type == JSONCommon::JSONPathType::REGULAR);
57
93
  auto &paths = args.data[1];
58
94
  BinaryExecutor::ExecuteWithNulls<string_t, string_t, T>(
59
95
  inputs, paths, result, args.size(), [&](string_t input, string_t path, ValidityMask &mask, idx_t idx) {
60
96
  auto doc = JSONCommon::ReadDocument(input, JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
61
- auto val = JSONCommon::GetPointer<yyjson_val>(doc->root, path);
97
+ auto val = JSONCommon::Get(doc->root, path);
62
98
  if (!val || unsafe_yyjson_is_null(val)) {
63
99
  mask.SetInvalid(idx);
64
100
  return T {};
@@ -111,7 +147,7 @@ public:
111
147
  auto doc = JSONCommon::ReadDocument(inputs[idx], JSONCommon::READ_FLAG, lstate.json_allocator.GetYYAlc());
112
148
  for (idx_t path_i = 0; path_i < num_paths; path_i++) {
113
149
  auto child_idx = offset + path_i;
114
- val = JSONCommon::GetPointerUnsafe<yyjson_val>(doc->root, info.ptrs[path_i], info.lens[path_i]);
150
+ val = JSONCommon::GetUnsafe(doc->root, info.ptrs[path_i], info.lens[path_i]);
115
151
  if (!val || unsafe_yyjson_is_null(val)) {
116
152
  child_validity.SetInvalid(child_idx);
117
153
  } else {
@@ -24,7 +24,7 @@ class BuiltinFunctions;
24
24
  // Scalar function stuff
25
25
  struct JSONReadFunctionData : public FunctionData {
26
26
  public:
27
- JSONReadFunctionData(bool constant, string path_p, idx_t len);
27
+ JSONReadFunctionData(bool constant, string path_p, idx_t len, JSONCommon::JSONPathType path_type);
28
28
  unique_ptr<FunctionData> Copy() const override;
29
29
  bool Equals(const FunctionData &other_p) const override;
30
30
  static unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
@@ -33,6 +33,7 @@ public:
33
33
  public:
34
34
  const bool constant;
35
35
  const string path;
36
+ const JSONCommon::JSONPathType path_type;
36
37
  const char *ptr;
37
38
  const size_t len;
38
39
  };