duckdb 0.7.2-dev2320.0 → 0.7.2-dev2410.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/package.json +1 -1
  2. package/src/data_chunk.cpp +1 -1
  3. package/src/duckdb/extension/icu/icu-extension.cpp +2 -2
  4. package/src/duckdb/extension/icu/icu-makedate.cpp +52 -0
  5. package/src/duckdb/extension/icu/icu-strptime.cpp +1 -1
  6. package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +4 -0
  7. package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.cpp +28 -28
  8. package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.h +4 -4
  9. package/src/duckdb/extension/json/include/json_common.hpp +1 -1
  10. package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
  11. package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
  12. package/src/duckdb/extension/json/json_functions.cpp +2 -2
  13. package/src/duckdb/extension/json/json_serializer.cpp +1 -1
  14. package/src/duckdb/extension/parquet/column_reader.cpp +1 -1
  15. package/src/duckdb/extension/parquet/column_writer.cpp +3 -3
  16. package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +2 -2
  17. package/src/duckdb/src/common/arrow/arrow_appender.cpp +2 -2
  18. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  19. package/src/duckdb/src/common/file_buffer.cpp +8 -0
  20. package/src/duckdb/src/common/operator/cast_operators.cpp +24 -25
  21. package/src/duckdb/src/common/radix_partitioning.cpp +34 -0
  22. package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +2 -2
  23. package/src/duckdb/src/common/row_operations/row_scatter.cpp +1 -1
  24. package/src/duckdb/src/common/sort/partition_state.cpp +44 -124
  25. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  26. package/src/duckdb/src/common/types/bit.cpp +18 -18
  27. package/src/duckdb/src/common/types/blob.cpp +7 -7
  28. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +1 -1
  29. package/src/duckdb/src/common/types/column/column_data_collection.cpp +1 -1
  30. package/src/duckdb/src/common/types/hash.cpp +1 -1
  31. package/src/duckdb/src/common/types/hyperloglog.cpp +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  33. package/src/duckdb/src/common/types/string_heap.cpp +2 -2
  34. package/src/duckdb/src/common/types/string_type.cpp +2 -2
  35. package/src/duckdb/src/common/types/timestamp.cpp +1 -1
  36. package/src/duckdb/src/common/types/vector.cpp +7 -7
  37. package/src/duckdb/src/execution/index/art/art_key.cpp +2 -2
  38. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +144 -31
  39. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +698 -0
  40. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +1 -1
  41. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  42. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -1
  43. package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -2
  44. package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +2 -2
  45. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +2 -2
  46. package/src/duckdb/src/function/aggregate/distributive/first.cpp +2 -2
  47. package/src/duckdb/src/function/aggregate/distributive/kurtosis.cpp +3 -2
  48. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -2
  49. package/src/duckdb/src/function/aggregate/distributive/skew.cpp +5 -1
  50. package/src/duckdb/src/function/aggregate/distributive/string_agg.cpp +1 -1
  51. package/src/duckdb/src/function/cast/list_casts.cpp +1 -1
  52. package/src/duckdb/src/function/cast/struct_cast.cpp +1 -1
  53. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -3
  54. package/src/duckdb/src/function/scalar/bit/bitstring.cpp +1 -1
  55. package/src/duckdb/src/function/scalar/blob/encode.cpp +1 -1
  56. package/src/duckdb/src/function/scalar/date/strftime.cpp +3 -3
  57. package/src/duckdb/src/function/scalar/generic/current_setting.cpp +1 -1
  58. package/src/duckdb/src/function/scalar/list/list_sort.cpp +30 -56
  59. package/src/duckdb/src/function/scalar/string/ascii.cpp +1 -1
  60. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -2
  61. package/src/duckdb/src/function/scalar/string/concat.cpp +6 -6
  62. package/src/duckdb/src/function/scalar/string/contains.cpp +2 -2
  63. package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +2 -2
  64. package/src/duckdb/src/function/scalar/string/hex.cpp +4 -4
  65. package/src/duckdb/src/function/scalar/string/instr.cpp +1 -1
  66. package/src/duckdb/src/function/scalar/string/jaccard.cpp +1 -1
  67. package/src/duckdb/src/function/scalar/string/jaro_winkler.cpp +5 -5
  68. package/src/duckdb/src/function/scalar/string/length.cpp +1 -1
  69. package/src/duckdb/src/function/scalar/string/levenshtein.cpp +2 -2
  70. package/src/duckdb/src/function/scalar/string/like.cpp +10 -11
  71. package/src/duckdb/src/function/scalar/string/mismatches.cpp +2 -2
  72. package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +1 -1
  73. package/src/duckdb/src/function/scalar/string/pad.cpp +3 -3
  74. package/src/duckdb/src/function/scalar/string/prefix.cpp +2 -2
  75. package/src/duckdb/src/function/scalar/string/printf.cpp +1 -1
  76. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +4 -4
  77. package/src/duckdb/src/function/scalar/string/repeat.cpp +1 -1
  78. package/src/duckdb/src/function/scalar/string/replace.cpp +3 -3
  79. package/src/duckdb/src/function/scalar/string/reverse.cpp +1 -1
  80. package/src/duckdb/src/function/scalar/string/starts_with.cpp +2 -2
  81. package/src/duckdb/src/function/scalar/string/string_split.cpp +3 -3
  82. package/src/duckdb/src/function/scalar/string/strip_accents.cpp +2 -2
  83. package/src/duckdb/src/function/scalar/string/substring.cpp +3 -3
  84. package/src/duckdb/src/function/scalar/string/suffix.cpp +2 -2
  85. package/src/duckdb/src/function/scalar/string/translate.cpp +3 -3
  86. package/src/duckdb/src/function/scalar/string/trim.cpp +3 -3
  87. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +1 -1
  88. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -7
  89. package/src/duckdb/src/function/scalar/union/union_extract.cpp +1 -1
  90. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  91. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +2 -2
  92. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  93. package/src/duckdb/src/include/duckdb/common/crypto/md5.hpp +1 -1
  94. package/src/duckdb/src/include/duckdb/common/enums/debug_initialize.hpp +17 -0
  95. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +8 -0
  96. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  97. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +3 -0
  98. package/src/duckdb/src/include/duckdb/common/radix.hpp +1 -1
  99. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +3 -0
  100. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -60
  101. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +8 -6
  102. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +1 -1
  103. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +6 -1
  104. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +93 -0
  105. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +1 -1
  106. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +17 -0
  107. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +1 -1
  108. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -2
  109. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
  110. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
  111. package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +1 -1
  112. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  113. package/src/duckdb/src/include/duckdb/main/config.hpp +7 -2
  114. package/src/duckdb/src/include/duckdb/main/settings.hpp +13 -3
  115. package/src/duckdb/src/include/duckdb/optimizer/cse_optimizer.hpp +1 -1
  116. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -2
  117. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
  118. package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
  119. package/src/duckdb/src/include/duckdb/planner/expression_binder/aggregate_binder.hpp +1 -1
  120. package/src/duckdb/src/include/duckdb/planner/expression_binder/alter_binder.hpp +1 -1
  121. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +4 -3
  122. package/src/duckdb/src/include/duckdb/planner/expression_binder/check_binder.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/planner/expression_binder/constant_binder.hpp +1 -1
  124. package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -1
  125. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +1 -1
  127. package/src/duckdb/src/include/duckdb/planner/expression_binder/insert_binder.hpp +1 -1
  128. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +2 -2
  129. package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +2 -2
  130. package/src/duckdb/src/include/duckdb/planner/expression_binder/relation_binder.hpp +1 -1
  131. package/src/duckdb/src/include/duckdb/planner/expression_binder/returning_binder.hpp +1 -1
  132. package/src/duckdb/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +1 -1
  133. package/src/duckdb/src/include/duckdb/planner/expression_binder/update_binder.hpp +1 -1
  134. package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +2 -2
  135. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +12 -9
  136. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +1 -0
  137. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +3 -0
  138. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -1
  139. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +11 -5
  140. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
  141. package/src/duckdb/src/main/capi/cast/from_decimal-c.cpp +1 -1
  142. package/src/duckdb/src/main/capi/result-c.cpp +2 -2
  143. package/src/duckdb/src/main/config.cpp +26 -0
  144. package/src/duckdb/src/main/settings/settings.cpp +31 -8
  145. package/src/duckdb/src/optimizer/cse_optimizer.cpp +9 -8
  146. package/src/duckdb/src/parser/expression/subquery_expression.cpp +1 -1
  147. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
  148. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +33 -29
  149. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +8 -10
  150. package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
  151. package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +2 -2
  152. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +1 -1
  153. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +8 -7
  154. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +2 -2
  155. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +6 -6
  156. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +2 -2
  157. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
  158. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +6 -14
  159. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +2 -5
  160. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +1 -1
  161. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +8 -8
  162. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +5 -5
  163. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -2
  164. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  165. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +2 -2
  166. package/src/duckdb/src/planner/binder/tableref/plan_expressionlistref.cpp +1 -1
  167. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +4 -4
  168. package/src/duckdb/src/planner/expression.cpp +2 -1
  169. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +2 -2
  170. package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
  171. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
  172. package/src/duckdb/src/planner/expression_binder/check_binder.cpp +4 -4
  173. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +1 -1
  174. package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
  175. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
  176. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +4 -4
  177. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
  178. package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
  179. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
  180. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +4 -4
  181. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +2 -2
  182. package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +2 -2
  183. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +3 -3
  184. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +2 -2
  185. package/src/duckdb/src/planner/expression_binder/where_binder.cpp +4 -4
  186. package/src/duckdb/src/planner/expression_binder.cpp +12 -12
  187. package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -2
  188. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +2 -2
  189. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
  190. package/src/duckdb/src/storage/compression/fsst.cpp +3 -3
  191. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +1 -1
  192. package/src/duckdb/src/storage/meta_block_writer.cpp +4 -0
  193. package/src/duckdb/src/storage/partial_block_manager.cpp +11 -4
  194. package/src/duckdb/src/storage/single_file_block_manager.cpp +16 -9
  195. package/src/duckdb/src/storage/standard_buffer_manager.cpp +5 -2
  196. package/src/duckdb/src/storage/statistics/string_stats.cpp +2 -2
  197. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  198. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +21 -1
  199. package/src/duckdb/ub_src_execution_operator_join.cpp +2 -0
  200. package/src/statement.cpp +3 -3
@@ -18,15 +18,24 @@ PartitionGlobalHashGroup::PartitionGlobalHashGroup(BufferManager &buffer_manager
18
18
  global_sort = make_uniq<GlobalSortState>(buffer_manager, orders, payload_layout);
19
19
  global_sort->external = external;
20
20
 
21
+ // Set up a comparator for the partition subset
21
22
  partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
22
23
  }
23
24
 
25
+ int PartitionGlobalHashGroup::ComparePartitions(const SBIterator &left, const SBIterator &right) const {
26
+ int part_cmp = 0;
27
+ if (partition_layout.all_constant) {
28
+ part_cmp = FastMemcmp(left.entry_ptr, right.entry_ptr, partition_layout.comparison_size);
29
+ } else {
30
+ part_cmp = Comparators::CompareTuple(left.scan, right.scan, left.entry_ptr, right.entry_ptr, partition_layout,
31
+ left.external);
32
+ }
33
+ return part_cmp;
34
+ }
35
+
24
36
  void PartitionGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask) {
25
37
  D_ASSERT(count > 0);
26
38
 
27
- // Set up a comparator for the partition subset
28
- const auto partition_size = partition_layout.comparison_size;
29
-
30
39
  SBIterator prev(*global_sort, ExpressionType::COMPARE_LESSTHAN);
31
40
  SBIterator curr(*global_sort, ExpressionType::COMPARE_LESSTHAN);
32
41
 
@@ -34,13 +43,8 @@ void PartitionGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, Validi
34
43
  order_mask.SetValidUnsafe(0);
35
44
  for (++curr; curr.GetIndex() < count; ++curr) {
36
45
  // Compare the partition subset first because if that differs, then so does the full ordering
37
- int part_cmp = 0;
38
- if (partition_layout.all_constant) {
39
- part_cmp = FastMemcmp(prev.entry_ptr, curr.entry_ptr, partition_size);
40
- } else {
41
- part_cmp = Comparators::CompareTuple(prev.scan, curr.scan, prev.entry_ptr, curr.entry_ptr, partition_layout,
42
- prev.external);
43
- }
46
+ const auto part_cmp = ComparePartitions(prev, curr);
47
+ ;
44
48
 
45
49
  if (part_cmp) {
46
50
  partition_mask.SetValidUnsafe(curr.GetIndex());
@@ -52,31 +56,40 @@ void PartitionGlobalHashGroup::ComputeMasks(ValidityMask &partition_mask, Validi
52
56
  }
53
57
  }
54
58
 
55
- PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
56
- const vector<unique_ptr<Expression>> &partitions_p,
57
- const vector<BoundOrderByNode> &orders_p, const Types &payload_types,
58
- const vector<unique_ptr<BaseStatistics>> &partitions_stats,
59
- idx_t estimated_cardinality)
60
- : context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
61
- payload_types(payload_types), memory_per_thread(0), count(0) {
59
+ void PartitionGlobalSinkState::GenerateOrderings(Orders &partitions, Orders &orders,
60
+ const vector<unique_ptr<Expression>> &partition_bys,
61
+ const Orders &order_bys,
62
+ const vector<unique_ptr<BaseStatistics>> &partition_stats) {
62
63
 
63
64
  // we sort by both 1) partition by expression list and 2) order by expressions
64
- const auto partition_cols = partitions_p.size();
65
+ const auto partition_cols = partition_bys.size();
65
66
  for (idx_t prt_idx = 0; prt_idx < partition_cols; prt_idx++) {
66
- auto &pexpr = partitions_p[prt_idx];
67
+ auto &pexpr = partition_bys[prt_idx];
67
68
 
68
- if (partitions_stats.empty() || !partitions_stats[prt_idx]) {
69
+ if (partition_stats.empty() || !partition_stats[prt_idx]) {
69
70
  orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(), nullptr);
70
71
  } else {
71
72
  orders.emplace_back(OrderType::ASCENDING, OrderByNullType::NULLS_FIRST, pexpr->Copy(),
72
- partitions_stats[prt_idx]->ToUnique());
73
+ partition_stats[prt_idx]->ToUnique());
73
74
  }
74
75
  partitions.emplace_back(orders.back().Copy());
75
76
  }
76
77
 
77
- for (const auto &order : orders_p) {
78
+ for (const auto &order : order_bys) {
78
79
  orders.emplace_back(order.Copy());
79
80
  }
81
+ }
82
+
83
+ PartitionGlobalSinkState::PartitionGlobalSinkState(ClientContext &context,
84
+ const vector<unique_ptr<Expression>> &partition_bys,
85
+ const vector<BoundOrderByNode> &order_bys,
86
+ const Types &payload_types,
87
+ const vector<unique_ptr<BaseStatistics>> &partition_stats,
88
+ idx_t estimated_cardinality)
89
+ : context(context), buffer_manager(BufferManager::GetBufferManager(context)), allocator(Allocator::Get(context)),
90
+ payload_types(payload_types), memory_per_thread(0), count(0) {
91
+
92
+ GenerateOrderings(partitions, orders, partition_bys, order_bys, partition_stats);
80
93
 
81
94
  memory_per_thread = PhysicalOperator::GetMaxThreadMemory(context);
82
95
  external = ClientConfig::GetConfig(context).force_external;
@@ -337,7 +350,8 @@ void PartitionLocalSinkState::Combine() {
337
350
  gstate.CombineLocalPartition(local_partition, local_append);
338
351
  }
339
352
 
340
- PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data)
353
+ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data,
354
+ hash_t hash_bin)
341
355
  : sink(sink), group_data(std::move(group_data)), stage(PartitionSortStage::INIT), total_tasks(0), tasks_assigned(0),
342
356
  tasks_completed(0) {
343
357
 
@@ -348,6 +362,8 @@ PartitionGlobalMergeState::PartitionGlobalMergeState(PartitionGlobalSinkState &s
348
362
 
349
363
  hash_group = sink.hash_groups[group_idx].get();
350
364
  global_sort = sink.hash_groups[group_idx]->global_sort.get();
365
+
366
+ sink.bin_groups[hash_bin] = group_idx;
351
367
  }
352
368
 
353
369
  void PartitionLocalMergeState::Prepare() {
@@ -445,10 +461,13 @@ bool PartitionGlobalMergeState::TryPrepareNextStage() {
445
461
 
446
462
  PartitionGlobalMergeStates::PartitionGlobalMergeStates(PartitionGlobalSinkState &sink) {
447
463
  // Schedule all the sorts for maximum thread utilisation
448
- for (auto &group_data : sink.grouping_data->GetPartitions()) {
464
+ auto &partitions = sink.grouping_data->GetPartitions();
465
+ sink.bin_groups.resize(partitions.size(), partitions.size());
466
+ for (hash_t hash_bin = 0; hash_bin < partitions.size(); ++hash_bin) {
467
+ auto &group_data = partitions[hash_bin];
449
468
  // Prepare for merge sort phase
450
469
  if (group_data->Count()) {
451
- auto state = make_uniq<PartitionGlobalMergeState>(sink, std::move(group_data));
470
+ auto state = make_uniq<PartitionGlobalMergeState>(sink, std::move(group_data), hash_bin);
452
471
  states.emplace_back(std::move(state));
453
472
  }
454
473
  }
@@ -542,103 +561,4 @@ void PartitionMergeEvent::Schedule() {
542
561
  SetTasks(std::move(merge_tasks));
543
562
  }
544
563
 
545
- PartitionLocalSourceState::PartitionLocalSourceState(PartitionGlobalSinkState &gstate_p) : gstate(gstate_p) {
546
- const auto &input_types = gstate.payload_types;
547
- layout.Initialize(input_types);
548
- input_chunk.Initialize(gstate.allocator, input_types);
549
- }
550
-
551
- void PartitionLocalSourceState::MaterializeSortedData() {
552
- auto &global_sort_state = *hash_group->global_sort;
553
- if (global_sort_state.sorted_blocks.empty()) {
554
- return;
555
- }
556
-
557
- // scan the sorted row data
558
- D_ASSERT(global_sort_state.sorted_blocks.size() == 1);
559
- auto &sb = *global_sort_state.sorted_blocks[0];
560
-
561
- // Free up some memory before allocating more
562
- sb.radix_sorting_data.clear();
563
- sb.blob_sorting_data = nullptr;
564
-
565
- // Move the sorting row blocks into our RDCs
566
- auto &buffer_manager = global_sort_state.buffer_manager;
567
- auto &sd = *sb.payload_data;
568
-
569
- // Data blocks are required
570
- D_ASSERT(!sd.data_blocks.empty());
571
- auto &block = sd.data_blocks[0];
572
- rows = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
573
- rows->blocks = std::move(sd.data_blocks);
574
- rows->count = std::accumulate(rows->blocks.begin(), rows->blocks.end(), idx_t(0),
575
- [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
576
-
577
- // Heap blocks are optional, but we want both for iteration.
578
- if (!sd.heap_blocks.empty()) {
579
- auto &block = sd.heap_blocks[0];
580
- heap = make_uniq<RowDataCollection>(buffer_manager, block->capacity, block->entry_size);
581
- heap->blocks = std::move(sd.heap_blocks);
582
- hash_group.reset();
583
- } else {
584
- heap = make_uniq<RowDataCollection>(buffer_manager, (idx_t)Storage::BLOCK_SIZE, 1, true);
585
- }
586
- heap->count = std::accumulate(heap->blocks.begin(), heap->blocks.end(), idx_t(0),
587
- [&](idx_t c, const unique_ptr<RowDataBlock> &b) { return c + b->count; });
588
- }
589
-
590
- idx_t PartitionLocalSourceState::GeneratePartition(const idx_t hash_bin_p) {
591
- // Get rid of any stale data
592
- hash_bin = hash_bin_p;
593
-
594
- // There are three types of partitions:
595
- // 1. No partition (no sorting)
596
- // 2. One partition (sorting, but no hashing)
597
- // 3. Multiple partitions (sorting and hashing)
598
-
599
- // How big is the partition?
600
- idx_t count = 0;
601
- if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
602
- count = gstate.hash_groups[hash_bin]->count;
603
- } else if (gstate.rows && !hash_bin) {
604
- count = gstate.count;
605
- } else {
606
- return count;
607
- }
608
-
609
- // Initialise masks to false
610
- const auto bit_count = ValidityMask::ValidityMaskSize(count);
611
- partition_bits.clear();
612
- partition_bits.resize(bit_count, 0);
613
- partition_mask.Initialize(partition_bits.data());
614
-
615
- order_bits.clear();
616
- order_bits.resize(bit_count, 0);
617
- order_mask.Initialize(order_bits.data());
618
-
619
- // Scan the sorted data into new Collections
620
- auto external = gstate.external;
621
- if (gstate.rows && !hash_bin) {
622
- // Simple mask
623
- partition_mask.SetValidUnsafe(0);
624
- order_mask.SetValidUnsafe(0);
625
- // No partition - align the heap blocks with the row blocks
626
- rows = gstate.rows->CloneEmpty(gstate.rows->keep_pinned);
627
- heap = gstate.strings->CloneEmpty(gstate.strings->keep_pinned);
628
- RowDataCollectionScanner::AlignHeapBlocks(*rows, *heap, *gstate.rows, *gstate.strings, layout);
629
- external = true;
630
- } else if (hash_bin < gstate.hash_groups.size() && gstate.hash_groups[hash_bin]) {
631
- // Overwrite the collections with the sorted data
632
- hash_group = std::move(gstate.hash_groups[hash_bin]);
633
- hash_group->ComputeMasks(partition_mask, order_mask);
634
- MaterializeSortedData();
635
- } else {
636
- return count;
637
- }
638
-
639
- scanner = make_uniq<RowDataCollectionScanner>(*rows, *heap, layout, external, false);
640
-
641
- return count;
642
- }
643
-
644
564
  } // namespace duckdb
@@ -367,7 +367,7 @@ int SBIterator::ComparisonValue(ExpressionType comparison) {
367
367
  }
368
368
 
369
369
  static idx_t GetBlockCountWithEmptyCheck(const GlobalSortState &gss) {
370
- D_ASSERT(gss.sorted_blocks.size() > 0);
370
+ D_ASSERT(!gss.sorted_blocks.empty());
371
371
  return gss.sorted_blocks[0]->radix_sorting_data.size();
372
372
  }
373
373
 
@@ -20,7 +20,7 @@ idx_t Bit::ComputeBitstringLen(idx_t len) {
20
20
  }
21
21
 
22
22
  static inline idx_t GetBitPadding(const string_t &bit_string) {
23
- auto data = (const_data_ptr_t)bit_string.GetDataUnsafe();
23
+ auto data = (const_data_ptr_t)bit_string.GetData();
24
24
  D_ASSERT(idx_t(data[0]) <= 8);
25
25
  return data[0];
26
26
  }
@@ -46,7 +46,7 @@ void Bit::Finalize(string_t &str) {
46
46
 
47
47
  void Bit::SetEmptyBitString(string_t &target, string_t &input) {
48
48
  char *res_buf = target.GetDataWriteable();
49
- const char *buf = input.GetDataUnsafe();
49
+ const char *buf = input.GetData();
50
50
  memset(res_buf, 0, input.GetSize());
51
51
  res_buf[0] = buf[0];
52
52
  Bit::Finalize(target);
@@ -61,7 +61,7 @@ void Bit::SetEmptyBitString(string_t &target, idx_t len) {
61
61
 
62
62
  // **** casting functions ****
63
63
  void Bit::ToString(string_t bits, char *output) {
64
- auto data = (const_data_ptr_t)bits.GetDataUnsafe();
64
+ auto data = (const_data_ptr_t)bits.GetData();
65
65
  auto len = bits.GetSize();
66
66
 
67
67
  idx_t padding = GetBitPadding(bits);
@@ -84,7 +84,7 @@ string Bit::ToString(string_t str) {
84
84
  }
85
85
 
86
86
  bool Bit::TryGetBitStringSize(string_t str, idx_t &str_len, string *error_message) {
87
- auto data = (const_data_ptr_t)str.GetDataUnsafe();
87
+ auto data = (const_data_ptr_t)str.GetData();
88
88
  auto len = str.GetSize();
89
89
  str_len = 0;
90
90
  for (idx_t i = 0; i < len; i++) {
@@ -107,7 +107,7 @@ bool Bit::TryGetBitStringSize(string_t str, idx_t &str_len, string *error_messag
107
107
  }
108
108
 
109
109
  void Bit::ToBit(string_t str, string_t &output_str) {
110
- auto data = (const_data_ptr_t)str.GetDataUnsafe();
110
+ auto data = (const_data_ptr_t)str.GetData();
111
111
  auto len = str.GetSize();
112
112
  auto output = output_str.GetDataWriteable();
113
113
 
@@ -149,7 +149,7 @@ string Bit::ToBit(string_t str) {
149
149
  // **** scalar functions ****
150
150
  void Bit::BitString(const string_t &input, const idx_t &bit_length, string_t &result) {
151
151
  char *res_buf = result.GetDataWriteable();
152
- const char *buf = input.GetDataUnsafe();
152
+ const char *buf = input.GetData();
153
153
 
154
154
  auto padding = ComputePadding(bit_length);
155
155
  res_buf[0] = padding;
@@ -174,7 +174,7 @@ idx_t Bit::OctetLength(string_t bits) {
174
174
 
175
175
  idx_t Bit::BitCount(string_t bits) {
176
176
  idx_t count = 0;
177
- const char *buf = bits.GetDataUnsafe();
177
+ const char *buf = bits.GetData();
178
178
  for (idx_t byte_idx = 1; byte_idx < OctetLength(bits) + 1; byte_idx++) {
179
179
  for (idx_t bit_idx = 0; bit_idx < 8; bit_idx++) {
180
180
  count += (buf[byte_idx] & (1 << bit_idx)) ? 1 : 0;
@@ -184,7 +184,7 @@ idx_t Bit::BitCount(string_t bits) {
184
184
  }
185
185
 
186
186
  idx_t Bit::BitPosition(string_t substring, string_t bits) {
187
- const char *buf = bits.GetDataUnsafe();
187
+ const char *buf = bits.GetData();
188
188
  auto len = bits.GetSize();
189
189
  auto substr_len = BitLength(substring);
190
190
  idx_t substr_idx = 0;
@@ -226,7 +226,7 @@ idx_t Bit::GetBitIndex(idx_t n) {
226
226
  }
227
227
 
228
228
  idx_t Bit::GetBitInternal(string_t bit_string, idx_t n) {
229
- const char *buf = bit_string.GetDataUnsafe();
229
+ const char *buf = bit_string.GetData();
230
230
  auto idx = Bit::GetBitIndex(n);
231
231
  D_ASSERT(idx < bit_string.GetSize());
232
232
  char byte = buf[idx] >> (7 - (n % 8));
@@ -254,7 +254,7 @@ void Bit::SetBitInternal(string_t &bit_string, idx_t n, idx_t new_value) {
254
254
  // **** BITWISE operators ****
255
255
  void Bit::RightShift(const string_t &bit_string, const idx_t &shift, string_t &result) {
256
256
  char *res_buf = result.GetDataWriteable();
257
- const char *buf = bit_string.GetDataUnsafe();
257
+ const char *buf = bit_string.GetData();
258
258
  res_buf[0] = buf[0];
259
259
  for (idx_t i = 0; i < Bit::BitLength(result); i++) {
260
260
  if (i < shift) {
@@ -269,7 +269,7 @@ void Bit::RightShift(const string_t &bit_string, const idx_t &shift, string_t &r
269
269
 
270
270
  void Bit::LeftShift(const string_t &bit_string, const idx_t &shift, string_t &result) {
271
271
  char *res_buf = result.GetDataWriteable();
272
- const char *buf = bit_string.GetDataUnsafe();
272
+ const char *buf = bit_string.GetData();
273
273
  res_buf[0] = buf[0];
274
274
  for (idx_t i = 0; i < Bit::BitLength(bit_string); i++) {
275
275
  if (i < (Bit::BitLength(bit_string) - shift)) {
@@ -289,8 +289,8 @@ void Bit::BitwiseAnd(const string_t &rhs, const string_t &lhs, string_t &result)
289
289
  }
290
290
 
291
291
  char *buf = result.GetDataWriteable();
292
- const char *r_buf = rhs.GetDataUnsafe();
293
- const char *l_buf = lhs.GetDataUnsafe();
292
+ const char *r_buf = rhs.GetData();
293
+ const char *l_buf = lhs.GetData();
294
294
 
295
295
  buf[0] = l_buf[0];
296
296
  for (idx_t i = 1; i < lhs.GetSize(); i++) {
@@ -306,8 +306,8 @@ void Bit::BitwiseOr(const string_t &rhs, const string_t &lhs, string_t &result)
306
306
  }
307
307
 
308
308
  char *buf = result.GetDataWriteable();
309
- const char *r_buf = rhs.GetDataUnsafe();
310
- const char *l_buf = lhs.GetDataUnsafe();
309
+ const char *r_buf = rhs.GetData();
310
+ const char *l_buf = lhs.GetData();
311
311
 
312
312
  buf[0] = l_buf[0];
313
313
  for (idx_t i = 1; i < lhs.GetSize(); i++) {
@@ -323,8 +323,8 @@ void Bit::BitwiseXor(const string_t &rhs, const string_t &lhs, string_t &result)
323
323
  }
324
324
 
325
325
  char *buf = result.GetDataWriteable();
326
- const char *r_buf = rhs.GetDataUnsafe();
327
- const char *l_buf = lhs.GetDataUnsafe();
326
+ const char *r_buf = rhs.GetData();
327
+ const char *l_buf = lhs.GetData();
328
328
 
329
329
  buf[0] = l_buf[0];
330
330
  for (idx_t i = 1; i < lhs.GetSize(); i++) {
@@ -335,7 +335,7 @@ void Bit::BitwiseXor(const string_t &rhs, const string_t &lhs, string_t &result)
335
335
 
336
336
  void Bit::BitwiseNot(const string_t &input, string_t &result) {
337
337
  char *result_buf = result.GetDataWriteable();
338
- const char *buf = input.GetDataUnsafe();
338
+ const char *buf = input.GetData();
339
339
 
340
340
  result_buf[0] = buf[0];
341
341
  for (idx_t i = 1; i < input.GetSize(); i++) {
@@ -24,7 +24,7 @@ bool IsRegularCharacter(data_t c) {
24
24
  }
25
25
 
26
26
  idx_t Blob::GetStringSize(string_t blob) {
27
- auto data = (const_data_ptr_t)blob.GetDataUnsafe();
27
+ auto data = (const_data_ptr_t)blob.GetData();
28
28
  auto len = blob.GetSize();
29
29
  idx_t str_len = 0;
30
30
  for (idx_t i = 0; i < len; i++) {
@@ -40,7 +40,7 @@ idx_t Blob::GetStringSize(string_t blob) {
40
40
  }
41
41
 
42
42
  void Blob::ToString(string_t blob, char *output) {
43
- auto data = (const_data_ptr_t)blob.GetDataUnsafe();
43
+ auto data = (const_data_ptr_t)blob.GetData();
44
44
  auto len = blob.GetSize();
45
45
  idx_t str_idx = 0;
46
46
  for (idx_t i = 0; i < len; i++) {
@@ -70,7 +70,7 @@ string Blob::ToString(string_t blob) {
70
70
  }
71
71
 
72
72
  bool Blob::TryGetBlobSize(string_t str, idx_t &str_len, string *error_message) {
73
- auto data = (const_data_ptr_t)str.GetDataUnsafe();
73
+ auto data = (const_data_ptr_t)str.GetData();
74
74
  auto len = str.GetSize();
75
75
  str_len = 0;
76
76
  for (idx_t i = 0; i < len; i++) {
@@ -112,7 +112,7 @@ idx_t Blob::GetBlobSize(string_t str) {
112
112
  }
113
113
 
114
114
  void Blob::ToBlob(string_t str, data_ptr_t output) {
115
- auto data = (const_data_ptr_t)str.GetDataUnsafe();
115
+ auto data = (const_data_ptr_t)str.GetData();
116
116
  auto len = str.GetSize();
117
117
  idx_t blob_idx = 0;
118
118
  for (idx_t i = 0; i < len; i++) {
@@ -149,7 +149,7 @@ idx_t Blob::ToBase64Size(string_t blob) {
149
149
  }
150
150
 
151
151
  void Blob::ToBase64(string_t blob, char *output) {
152
- auto input_data = (const_data_ptr_t)blob.GetDataUnsafe();
152
+ auto input_data = (const_data_ptr_t)blob.GetData();
153
153
  auto input_size = blob.GetSize();
154
154
  idx_t out_idx = 0;
155
155
  idx_t i;
@@ -192,7 +192,7 @@ static constexpr int BASE64_DECODING_TABLE[256] = {
192
192
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
193
193
 
194
194
  idx_t Blob::FromBase64Size(string_t str) {
195
- auto input_data = str.GetDataUnsafe();
195
+ auto input_data = str.GetData();
196
196
  auto input_size = str.GetSize();
197
197
  if (input_size % 4 != 0) {
198
198
  // valid base64 needs to always be cleanly divisible by 4
@@ -239,7 +239,7 @@ uint32_t DecodeBase64Bytes(const string_t &str, const_data_ptr_t input_data, idx
239
239
 
240
240
  void Blob::FromBase64(string_t str, data_ptr_t output, idx_t output_size) {
241
241
  D_ASSERT(output_size == FromBase64Size(str));
242
- auto input_data = (const_data_ptr_t)str.GetDataUnsafe();
242
+ auto input_data = (const_data_ptr_t)str.GetData();
243
243
  auto input_size = str.GetSize();
244
244
  if (input_size == 0) {
245
245
  return;
@@ -197,7 +197,7 @@ void ColumnDataAllocator::UnswizzlePointers(ChunkManagementState &state, Vector
197
197
  D_ASSERT(i < end);
198
198
 
199
199
  auto base_ptr = (char *)GetDataPointer(state, block_id, offset);
200
- if (strings[i].GetDataUnsafe() == base_ptr) {
200
+ if (strings[i].GetData() == base_ptr) {
201
201
  // pointers are still valid
202
202
  return;
203
203
  }
@@ -519,7 +519,7 @@ void ColumnDataCopy<string_t>(ColumnDataMetaData &meta_data, const UnifiedVector
519
519
  target_entry = source_entry;
520
520
  } else {
521
521
  D_ASSERT(heap_ptr != nullptr);
522
- memcpy(heap_ptr, source_entry.GetDataUnsafe(), source_entry.GetSize());
522
+ memcpy(heap_ptr, source_entry.GetData(), source_entry.GetSize());
523
523
  target_entry = string_t((const char *)heap_ptr, source_entry.GetSize());
524
524
  heap_ptr += source_entry.GetSize();
525
525
  }
@@ -64,7 +64,7 @@ hash_t Hash(const char *str) {
64
64
 
65
65
  template <>
66
66
  hash_t Hash(string_t val) {
67
- return Hash(val.GetDataUnsafe(), val.GetSize());
67
+ return Hash(val.GetData(), val.GetSize());
68
68
  }
69
69
 
70
70
  template <>
@@ -169,7 +169,7 @@ inline uint64_t HashOtherSize(const data_ptr_t &data, const idx_t &len) {
169
169
 
170
170
  template <>
171
171
  inline uint64_t TemplatedHash(const string_t &elem) {
172
- data_ptr_t data = (data_ptr_t)elem.GetDataUnsafe();
172
+ data_ptr_t data = (data_ptr_t)elem.GetData();
173
173
  const auto &len = elem.GetSize();
174
174
  uint64_t h = 0;
175
175
  for (idx_t i = 0; i + sizeof(uint64_t) <= len; i += sizeof(uint64_t)) {
@@ -28,7 +28,7 @@ inline void TupleDataValueStore(const string_t &source, const data_ptr_t &row_lo
28
28
  if (source.IsInlined()) {
29
29
  Store<string_t>(source, row_location + offset_in_row);
30
30
  } else {
31
- memcpy(heap_location, source.GetDataUnsafe(), source.GetSize());
31
+ memcpy(heap_location, source.GetData(), source.GetSize());
32
32
  Store<string_t>(string_t((const char *)heap_location, source.GetSize()), row_location + offset_in_row);
33
33
  heap_location += source.GetSize();
34
34
  }
@@ -44,7 +44,7 @@ template <>
44
44
  inline void TupleDataWithinListValueStore(const string_t &source, const data_ptr_t &location,
45
45
  data_ptr_t &heap_location) {
46
46
  Store<uint32_t>(source.GetSize(), location);
47
- memcpy(heap_location, source.GetDataUnsafe(), source.GetSize());
47
+ memcpy(heap_location, source.GetData(), source.GetSize());
48
48
  heap_location += source.GetSize();
49
49
  }
50
50
 
@@ -34,7 +34,7 @@ string_t StringHeap::AddString(const string &data) {
34
34
  }
35
35
 
36
36
  string_t StringHeap::AddString(const string_t &data) {
37
- return AddString(data.GetDataUnsafe(), data.GetSize());
37
+ return AddString(data.GetData(), data.GetSize());
38
38
  }
39
39
 
40
40
  string_t StringHeap::AddBlob(const char *data, idx_t len) {
@@ -46,7 +46,7 @@ string_t StringHeap::AddBlob(const char *data, idx_t len) {
46
46
  }
47
47
 
48
48
  string_t StringHeap::AddBlob(const string_t &data) {
49
- return AddBlob(data.GetDataUnsafe(), data.GetSize());
49
+ return AddBlob(data.GetData(), data.GetSize());
50
50
  }
51
51
 
52
52
  string_t StringHeap::EmptyString(idx_t len) {
@@ -7,7 +7,7 @@
7
7
  namespace duckdb {
8
8
 
9
9
  void string_t::Verify() const {
10
- auto dataptr = GetDataUnsafe();
10
+ auto dataptr = GetData();
11
11
  (void)dataptr;
12
12
  D_ASSERT(dataptr);
13
13
 
@@ -22,7 +22,7 @@ void string_t::Verify() const {
22
22
  }
23
23
  // verify that for strings with length <= INLINE_LENGTH, the rest of the string is zero
24
24
  for (idx_t i = GetSize(); i < INLINE_LENGTH; i++) {
25
- D_ASSERT(GetDataUnsafe()[i] == '\0');
25
+ D_ASSERT(GetData()[i] == '\0');
26
26
  }
27
27
  }
28
28
 
@@ -102,7 +102,7 @@ TimestampCastResult Timestamp::TryConvertTimestamp(const char *str, idx_t len, t
102
102
  }
103
103
  if (tz.GetSize() == 3) {
104
104
  // we can ONLY handle UTC without ICU being loaded
105
- auto tz_ptr = tz.GetDataUnsafe();
105
+ auto tz_ptr = tz.GetData();
106
106
  if ((tz_ptr[0] == 'u' || tz_ptr[0] == 'U') && (tz_ptr[1] == 't' || tz_ptr[1] == 'T') &&
107
107
  (tz_ptr[2] == 'c' || tz_ptr[2] == 'C')) {
108
108
  return TimestampCastResult::SUCCESS;
@@ -456,7 +456,7 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) {
456
456
  auto str_compressed = ((string_t *)data)[index];
457
457
  Value result =
458
458
  FSSTPrimitives::DecompressValue(FSSTVector::GetDecoder(const_cast<Vector &>(*vector)),
459
- (unsigned char *)str_compressed.GetDataUnsafe(), str_compressed.GetSize());
459
+ (unsigned char *)str_compressed.GetData(), str_compressed.GetSize());
460
460
  return result;
461
461
  }
462
462
 
@@ -543,11 +543,11 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) {
543
543
  case LogicalTypeId::AGGREGATE_STATE:
544
544
  case LogicalTypeId::BLOB: {
545
545
  auto str = ((string_t *)data)[index];
546
- return Value::BLOB((const_data_ptr_t)str.GetDataUnsafe(), str.GetSize());
546
+ return Value::BLOB((const_data_ptr_t)str.GetData(), str.GetSize());
547
547
  }
548
548
  case LogicalTypeId::BIT: {
549
549
  auto str = ((string_t *)data)[index];
550
- return Value::BIT((const_data_ptr_t)str.GetDataUnsafe(), str.GetSize());
550
+ return Value::BIT((const_data_ptr_t)str.GetData(), str.GetSize());
551
551
  }
552
552
  case LogicalTypeId::MAP: {
553
553
  auto offlen = ((list_entry_t *)data)[index];
@@ -637,7 +637,7 @@ string Vector::ToString(idx_t count) const {
637
637
  for (idx_t i = 0; i < count; i++) {
638
638
  string_t compressed_string = ((string_t *)data)[i];
639
639
  Value val = FSSTPrimitives::DecompressValue(FSSTVector::GetDecoder(const_cast<Vector &>(*this)),
640
- (unsigned char *)compressed_string.GetDataUnsafe(),
640
+ (unsigned char *)compressed_string.GetData(),
641
641
  compressed_string.GetSize());
642
642
  retval += GetValue(i).ToString() + (i == count - 1 ? "" : ", ");
643
643
  }
@@ -924,7 +924,7 @@ void Vector::Serialize(idx_t count, Serializer &serializer) {
924
924
  for (idx_t i = 0; i < count; i++) {
925
925
  auto idx = vdata.sel->get_index(i);
926
926
  auto source = !vdata.validity.RowIsValid(idx) ? NullValue<string_t>() : strings[idx];
927
- serializer.WriteStringLen((const_data_ptr_t)source.GetDataUnsafe(), source.GetSize());
927
+ serializer.WriteStringLen((const_data_ptr_t)source.GetData(), source.GetSize());
928
928
  }
929
929
  break;
930
930
  }
@@ -1321,7 +1321,7 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
1321
1321
  for (idx_t i = 0; i < count; i++) {
1322
1322
  auto oidx = sel->get_index(i);
1323
1323
  if (validity.RowIsValid(oidx)) {
1324
- auto buf = strings[oidx].GetDataUnsafe();
1324
+ auto buf = strings[oidx].GetData();
1325
1325
  D_ASSERT(*buf >= 0 && *buf < 8);
1326
1326
  Bit::Verify(strings[oidx]);
1327
1327
  }
@@ -1706,7 +1706,7 @@ void FSSTVector::DecompressVector(const Vector &src, Vector &dst, idx_t src_offs
1706
1706
  string_t compressed_string = ldata[source_idx];
1707
1707
  if (dst_mask.RowIsValid(target_idx) && compressed_string.GetSize() > 0) {
1708
1708
  tdata[target_idx] = FSSTPrimitives::DecompressValue(FSSTVector::GetDecoder(src), dst,
1709
- (unsigned char *)compressed_string.GetDataUnsafe(),
1709
+ (unsigned char *)compressed_string.GetData(),
1710
1710
  compressed_string.GetSize());
1711
1711
  } else {
1712
1712
  tdata[target_idx] = string_t(nullptr, 0);
@@ -18,7 +18,7 @@ template <>
18
18
  Key Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, string_t value) {
19
19
  idx_t len = value.GetSize() + 1;
20
20
  auto data = allocator.Allocate(len);
21
- memcpy(data, value.GetDataUnsafe(), len - 1);
21
+ memcpy(data, value.GetData(), len - 1);
22
22
 
23
23
  // FIXME: rethink this
24
24
  if (type == LogicalType::BLOB || type == LogicalType::VARCHAR) {
@@ -43,7 +43,7 @@ template <>
43
43
  void Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, string_t value) {
44
44
  key.len = value.GetSize() + 1;
45
45
  key.data = allocator.Allocate(key.len);
46
- memcpy(key.data, value.GetDataUnsafe(), key.len - 1);
46
+ memcpy(key.data, value.GetData(), key.len - 1);
47
47
 
48
48
  // FIXME: rethink this
49
49
  if (type == LogicalType::BLOB || type == LogicalType::VARCHAR) {