duckdb 0.7.2-dev2320.0 → 0.7.2-dev2410.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/package.json +1 -1
  2. package/src/data_chunk.cpp +1 -1
  3. package/src/duckdb/extension/icu/icu-extension.cpp +2 -2
  4. package/src/duckdb/extension/icu/icu-makedate.cpp +52 -0
  5. package/src/duckdb/extension/icu/icu-strptime.cpp +1 -1
  6. package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +4 -0
  7. package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.cpp +28 -28
  8. package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.h +4 -4
  9. package/src/duckdb/extension/json/include/json_common.hpp +1 -1
  10. package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
  11. package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
  12. package/src/duckdb/extension/json/json_functions.cpp +2 -2
  13. package/src/duckdb/extension/json/json_serializer.cpp +1 -1
  14. package/src/duckdb/extension/parquet/column_reader.cpp +1 -1
  15. package/src/duckdb/extension/parquet/column_writer.cpp +3 -3
  16. package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +2 -2
  17. package/src/duckdb/src/common/arrow/arrow_appender.cpp +2 -2
  18. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  19. package/src/duckdb/src/common/file_buffer.cpp +8 -0
  20. package/src/duckdb/src/common/operator/cast_operators.cpp +24 -25
  21. package/src/duckdb/src/common/radix_partitioning.cpp +34 -0
  22. package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +2 -2
  23. package/src/duckdb/src/common/row_operations/row_scatter.cpp +1 -1
  24. package/src/duckdb/src/common/sort/partition_state.cpp +44 -124
  25. package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
  26. package/src/duckdb/src/common/types/bit.cpp +18 -18
  27. package/src/duckdb/src/common/types/blob.cpp +7 -7
  28. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +1 -1
  29. package/src/duckdb/src/common/types/column/column_data_collection.cpp +1 -1
  30. package/src/duckdb/src/common/types/hash.cpp +1 -1
  31. package/src/duckdb/src/common/types/hyperloglog.cpp +1 -1
  32. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
  33. package/src/duckdb/src/common/types/string_heap.cpp +2 -2
  34. package/src/duckdb/src/common/types/string_type.cpp +2 -2
  35. package/src/duckdb/src/common/types/timestamp.cpp +1 -1
  36. package/src/duckdb/src/common/types/vector.cpp +7 -7
  37. package/src/duckdb/src/execution/index/art/art_key.cpp +2 -2
  38. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +144 -31
  39. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +698 -0
  40. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +1 -1
  41. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
  42. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -1
  43. package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -2
  44. package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +2 -2
  45. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +2 -2
  46. package/src/duckdb/src/function/aggregate/distributive/first.cpp +2 -2
  47. package/src/duckdb/src/function/aggregate/distributive/kurtosis.cpp +3 -2
  48. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -2
  49. package/src/duckdb/src/function/aggregate/distributive/skew.cpp +5 -1
  50. package/src/duckdb/src/function/aggregate/distributive/string_agg.cpp +1 -1
  51. package/src/duckdb/src/function/cast/list_casts.cpp +1 -1
  52. package/src/duckdb/src/function/cast/struct_cast.cpp +1 -1
  53. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -3
  54. package/src/duckdb/src/function/scalar/bit/bitstring.cpp +1 -1
  55. package/src/duckdb/src/function/scalar/blob/encode.cpp +1 -1
  56. package/src/duckdb/src/function/scalar/date/strftime.cpp +3 -3
  57. package/src/duckdb/src/function/scalar/generic/current_setting.cpp +1 -1
  58. package/src/duckdb/src/function/scalar/list/list_sort.cpp +30 -56
  59. package/src/duckdb/src/function/scalar/string/ascii.cpp +1 -1
  60. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -2
  61. package/src/duckdb/src/function/scalar/string/concat.cpp +6 -6
  62. package/src/duckdb/src/function/scalar/string/contains.cpp +2 -2
  63. package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +2 -2
  64. package/src/duckdb/src/function/scalar/string/hex.cpp +4 -4
  65. package/src/duckdb/src/function/scalar/string/instr.cpp +1 -1
  66. package/src/duckdb/src/function/scalar/string/jaccard.cpp +1 -1
  67. package/src/duckdb/src/function/scalar/string/jaro_winkler.cpp +5 -5
  68. package/src/duckdb/src/function/scalar/string/length.cpp +1 -1
  69. package/src/duckdb/src/function/scalar/string/levenshtein.cpp +2 -2
  70. package/src/duckdb/src/function/scalar/string/like.cpp +10 -11
  71. package/src/duckdb/src/function/scalar/string/mismatches.cpp +2 -2
  72. package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +1 -1
  73. package/src/duckdb/src/function/scalar/string/pad.cpp +3 -3
  74. package/src/duckdb/src/function/scalar/string/prefix.cpp +2 -2
  75. package/src/duckdb/src/function/scalar/string/printf.cpp +1 -1
  76. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +4 -4
  77. package/src/duckdb/src/function/scalar/string/repeat.cpp +1 -1
  78. package/src/duckdb/src/function/scalar/string/replace.cpp +3 -3
  79. package/src/duckdb/src/function/scalar/string/reverse.cpp +1 -1
  80. package/src/duckdb/src/function/scalar/string/starts_with.cpp +2 -2
  81. package/src/duckdb/src/function/scalar/string/string_split.cpp +3 -3
  82. package/src/duckdb/src/function/scalar/string/strip_accents.cpp +2 -2
  83. package/src/duckdb/src/function/scalar/string/substring.cpp +3 -3
  84. package/src/duckdb/src/function/scalar/string/suffix.cpp +2 -2
  85. package/src/duckdb/src/function/scalar/string/translate.cpp +3 -3
  86. package/src/duckdb/src/function/scalar/string/trim.cpp +3 -3
  87. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +1 -1
  88. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -7
  89. package/src/duckdb/src/function/scalar/union/union_extract.cpp +1 -1
  90. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  91. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +2 -2
  92. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  93. package/src/duckdb/src/include/duckdb/common/crypto/md5.hpp +1 -1
  94. package/src/duckdb/src/include/duckdb/common/enums/debug_initialize.hpp +17 -0
  95. package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +8 -0
  96. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  97. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +3 -0
  98. package/src/duckdb/src/include/duckdb/common/radix.hpp +1 -1
  99. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +3 -0
  100. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -60
  101. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +8 -6
  102. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +1 -1
  103. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +6 -1
  104. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +93 -0
  105. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +1 -1
  106. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +17 -0
  107. package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +1 -1
  108. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -2
  109. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
  110. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
  111. package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +1 -1
  112. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  113. package/src/duckdb/src/include/duckdb/main/config.hpp +7 -2
  114. package/src/duckdb/src/include/duckdb/main/settings.hpp +13 -3
  115. package/src/duckdb/src/include/duckdb/optimizer/cse_optimizer.hpp +1 -1
  116. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -2
  117. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
  118. package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
  119. package/src/duckdb/src/include/duckdb/planner/expression_binder/aggregate_binder.hpp +1 -1
  120. package/src/duckdb/src/include/duckdb/planner/expression_binder/alter_binder.hpp +1 -1
  121. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +4 -3
  122. package/src/duckdb/src/include/duckdb/planner/expression_binder/check_binder.hpp +1 -1
  123. package/src/duckdb/src/include/duckdb/planner/expression_binder/constant_binder.hpp +1 -1
  124. package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -1
  125. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -2
  126. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +1 -1
  127. package/src/duckdb/src/include/duckdb/planner/expression_binder/insert_binder.hpp +1 -1
  128. package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +2 -2
  129. package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +2 -2
  130. package/src/duckdb/src/include/duckdb/planner/expression_binder/relation_binder.hpp +1 -1
  131. package/src/duckdb/src/include/duckdb/planner/expression_binder/returning_binder.hpp +1 -1
  132. package/src/duckdb/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +1 -1
  133. package/src/duckdb/src/include/duckdb/planner/expression_binder/update_binder.hpp +1 -1
  134. package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +2 -2
  135. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +12 -9
  136. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +1 -0
  137. package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +3 -0
  138. package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -1
  139. package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +11 -5
  140. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
  141. package/src/duckdb/src/main/capi/cast/from_decimal-c.cpp +1 -1
  142. package/src/duckdb/src/main/capi/result-c.cpp +2 -2
  143. package/src/duckdb/src/main/config.cpp +26 -0
  144. package/src/duckdb/src/main/settings/settings.cpp +31 -8
  145. package/src/duckdb/src/optimizer/cse_optimizer.cpp +9 -8
  146. package/src/duckdb/src/parser/expression/subquery_expression.cpp +1 -1
  147. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
  148. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +33 -29
  149. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +8 -10
  150. package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
  151. package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +2 -2
  152. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +1 -1
  153. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +8 -7
  154. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +2 -2
  155. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +6 -6
  156. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +2 -2
  157. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
  158. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +6 -14
  159. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +2 -5
  160. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +1 -1
  161. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +8 -8
  162. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +5 -5
  163. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -2
  164. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  165. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +2 -2
  166. package/src/duckdb/src/planner/binder/tableref/plan_expressionlistref.cpp +1 -1
  167. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +4 -4
  168. package/src/duckdb/src/planner/expression.cpp +2 -1
  169. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +2 -2
  170. package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
  171. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
  172. package/src/duckdb/src/planner/expression_binder/check_binder.cpp +4 -4
  173. package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +1 -1
  174. package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
  175. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
  176. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +4 -4
  177. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
  178. package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
  179. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
  180. package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +4 -4
  181. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +2 -2
  182. package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +2 -2
  183. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +3 -3
  184. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +2 -2
  185. package/src/duckdb/src/planner/expression_binder/where_binder.cpp +4 -4
  186. package/src/duckdb/src/planner/expression_binder.cpp +12 -12
  187. package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -2
  188. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +2 -2
  189. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
  190. package/src/duckdb/src/storage/compression/fsst.cpp +3 -3
  191. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +1 -1
  192. package/src/duckdb/src/storage/meta_block_writer.cpp +4 -0
  193. package/src/duckdb/src/storage/partial_block_manager.cpp +11 -4
  194. package/src/duckdb/src/storage/single_file_block_manager.cpp +16 -9
  195. package/src/duckdb/src/storage/standard_buffer_manager.cpp +5 -2
  196. package/src/duckdb/src/storage/statistics/string_stats.cpp +2 -2
  197. package/src/duckdb/src/storage/storage_manager.cpp +7 -2
  198. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +21 -1
  199. package/src/duckdb/ub_src_execution_operator_join.cpp +2 -0
  200. package/src/statement.cpp +3 -3
@@ -208,7 +208,7 @@ public:
208
208
  // Copy string to dict
209
209
  current_dictionary.size += str.GetSize();
210
210
  auto dict_pos = current_end_ptr - current_dictionary.size;
211
- memcpy(dict_pos, str.GetDataUnsafe(), str.GetSize());
211
+ memcpy(dict_pos, str.GetData(), str.GetSize());
212
212
  current_dictionary.Verify();
213
213
  D_ASSERT(current_dictionary.end == Storage::BLOCK_SIZE);
214
214
 
@@ -155,7 +155,7 @@ idx_t FSSTStorage::StringFinalAnalyze(AnalyzeState &state_p) {
155
155
  vector<unsigned char *> fsst_string_ptrs;
156
156
  for (auto &str : state.fsst_strings) {
157
157
  fsst_string_sizes.push_back(str.GetSize());
158
- fsst_string_ptrs.push_back((unsigned char *)str.GetDataUnsafe());
158
+ fsst_string_ptrs.push_back((unsigned char *)str.GetData());
159
159
  }
160
160
 
161
161
  state.fsst_encoder = duckdb_fsst_create(string_count, &fsst_string_sizes[0], &fsst_string_ptrs[0], 0);
@@ -431,7 +431,7 @@ void FSSTStorage::Compress(CompressionState &state_p, Vector &scan_vector, idx_t
431
431
  total_count++;
432
432
  total_size += data[idx].GetSize();
433
433
  sizes_in.push_back(data[idx].GetSize());
434
- strings_in.push_back((unsigned char *)data[idx].GetDataUnsafe());
434
+ strings_in.push_back((unsigned char *)data[idx].GetData());
435
435
  }
436
436
 
437
437
  // Only Nulls or empty strings in this vector, nothing to compress
@@ -669,7 +669,7 @@ void FSSTStorage::StringFetchRow(ColumnSegment &segment, ColumnFetchState &state
669
669
  segment, dict, result, base_ptr, delta_decode_buffer[offsets.unused_delta_decoded_values], string_length);
670
670
 
671
671
  result_data[result_idx] = FSSTPrimitives::DecompressValue(
672
- (void *)&decoder, result, (unsigned char *)compressed_string.GetDataUnsafe(), compressed_string.GetSize());
672
+ (void *)&decoder, result, (unsigned char *)compressed_string.GetData(), compressed_string.GetSize());
673
673
  } else {
674
674
  // There's no fsst symtable, this only happens for empty strings or nulls, we can just emit an empty string
675
675
  result_data[result_idx] = string_t(nullptr, 0);
@@ -267,7 +267,7 @@ void UncompressedStringStorage::WriteStringMemory(ColumnSegment &segment, string
267
267
  auto ptr = handle.Ptr() + state.head->offset;
268
268
  Store<uint32_t>(string.GetSize(), ptr);
269
269
  ptr += sizeof(uint32_t);
270
- memcpy(ptr, string.GetDataUnsafe(), string.GetSize());
270
+ memcpy(ptr, string.GetData(), string.GetSize());
271
271
  state.head->offset += total_length;
272
272
  }
273
273
 
@@ -35,6 +35,10 @@ BlockPointer MetaBlockWriter::GetBlockPointer() {
35
35
  }
36
36
 
37
37
  void MetaBlockWriter::Flush() {
38
+ if (offset < block->size) {
39
+ // clear remaining bytes of block (if any)
40
+ memset(block->buffer + offset, 0, block->size - offset);
41
+ }
38
42
  AdvanceBlock();
39
43
  block = nullptr;
40
44
  }
@@ -54,9 +54,14 @@ bool PartialBlockManager::GetPartialBlock(idx_t segment_size, unique_ptr<Partial
54
54
  }
55
55
 
56
56
  void PartialBlockManager::RegisterPartialBlock(PartialBlockAllocation &&allocation) {
57
- auto &state(allocation.partial_block->state);
57
+ auto &state = allocation.partial_block->state;
58
58
  if (state.block_use_count < max_use_count) {
59
- auto new_size = AlignValue(allocation.allocation_size + state.offset_in_block);
59
+ auto unaligned_size = allocation.allocation_size + state.offset_in_block;
60
+ auto new_size = AlignValue(unaligned_size);
61
+ if (new_size != unaligned_size) {
62
+ // register the uninitialized region so we can correctly initialize it before writing to disk
63
+ allocation.partial_block->AddUninitializedRegion(unaligned_size, new_size);
64
+ }
60
65
  state.offset_in_block = new_size;
61
66
  auto new_space_left = state.block_size - new_size;
62
67
  // check if the block is STILL partially filled after adding the segment_size
@@ -65,22 +70,24 @@ void PartialBlockManager::RegisterPartialBlock(PartialBlockAllocation &&allocati
65
70
  partially_filled_blocks.insert(make_pair(new_space_left, std::move(allocation.partial_block)));
66
71
  }
67
72
  }
73
+ idx_t free_space = state.block_size - state.offset_in_block;
68
74
  auto block_to_free = std::move(allocation.partial_block);
69
75
  if (!block_to_free && partially_filled_blocks.size() > MAX_BLOCK_MAP_SIZE) {
70
76
  // Free the page with the least space free.
71
77
  auto itr = partially_filled_blocks.begin();
72
78
  block_to_free = std::move(itr->second);
79
+ free_space = state.block_size - itr->first;
73
80
  partially_filled_blocks.erase(itr);
74
81
  }
75
82
  // Flush any block that we're not going to reuse.
76
83
  if (block_to_free) {
77
- block_to_free->Flush();
84
+ block_to_free->Flush(free_space);
78
85
  }
79
86
  }
80
87
 
81
88
  void PartialBlockManager::FlushPartialBlocks() {
82
89
  for (auto &e : partially_filled_blocks) {
83
- e.second->Flush();
90
+ e.second->Flush(e.first);
84
91
  }
85
92
  partially_filled_blocks.clear();
86
93
  }
@@ -107,15 +107,15 @@ T DeserializeHeaderStructure(data_ptr_t ptr) {
107
107
  return T::Deserialize(source);
108
108
  }
109
109
 
110
- SingleFileBlockManager::SingleFileBlockManager(AttachedDatabase &db, string path_p, bool read_only, bool use_direct_io)
110
+ SingleFileBlockManager::SingleFileBlockManager(AttachedDatabase &db, string path_p, StorageManagerOptions options)
111
111
  : BlockManager(BufferManager::GetBufferManager(db)), db(db), path(std::move(path_p)),
112
112
  header_buffer(Allocator::Get(db), FileBufferType::MANAGED_BUFFER,
113
113
  Storage::FILE_HEADER_SIZE - Storage::BLOCK_HEADER_SIZE),
114
- iteration_count(0), read_only(read_only), use_direct_io(use_direct_io) {
114
+ iteration_count(0), options(options) {
115
115
  }
116
116
 
117
117
  void SingleFileBlockManager::GetFileFlags(uint8_t &flags, FileLockType &lock, bool create_new) {
118
- if (read_only) {
118
+ if (options.read_only) {
119
119
  D_ASSERT(!create_new);
120
120
  flags = FileFlags::FILE_FLAGS_READ;
121
121
  lock = FileLockType::READ_LOCK;
@@ -126,7 +126,7 @@ void SingleFileBlockManager::GetFileFlags(uint8_t &flags, FileLockType &lock, bo
126
126
  flags |= FileFlags::FILE_FLAGS_FILE_CREATE;
127
127
  }
128
128
  }
129
- if (use_direct_io) {
129
+ if (options.use_direct_io) {
130
130
  flags |= FileFlags::FILE_FLAGS_DIRECT_IO;
131
131
  }
132
132
  }
@@ -241,7 +241,7 @@ void SingleFileBlockManager::Initialize(DatabaseHeader &header) {
241
241
  }
242
242
 
243
243
  void SingleFileBlockManager::LoadFreeList() {
244
- if (read_only) {
244
+ if (options.read_only) {
245
245
  // no need to load free list for read only db
246
246
  return;
247
247
  }
@@ -343,13 +343,20 @@ idx_t SingleFileBlockManager::FreeBlocks() {
343
343
  return free_list.size();
344
344
  }
345
345
 
346
+ unique_ptr<Block> SingleFileBlockManager::ConvertBlock(block_id_t block_id, FileBuffer &source_buffer) {
347
+ D_ASSERT(source_buffer.AllocSize() == Storage::BLOCK_ALLOC_SIZE);
348
+ return make_uniq<Block>(source_buffer, block_id);
349
+ }
350
+
346
351
  unique_ptr<Block> SingleFileBlockManager::CreateBlock(block_id_t block_id, FileBuffer *source_buffer) {
352
+ unique_ptr<Block> result;
347
353
  if (source_buffer) {
348
- D_ASSERT(source_buffer->AllocSize() == Storage::BLOCK_ALLOC_SIZE);
349
- return make_uniq<Block>(*source_buffer, block_id);
354
+ result = ConvertBlock(block_id, *source_buffer);
350
355
  } else {
351
- return make_uniq<Block>(Allocator::Get(db), block_id);
356
+ result = make_uniq<Block>(Allocator::Get(db), block_id);
352
357
  }
358
+ result->Initialize(options.debug_initialize);
359
+ return result;
353
360
  }
354
361
 
355
362
  void SingleFileBlockManager::Read(Block &block) {
@@ -459,7 +466,7 @@ void SingleFileBlockManager::WriteHeader(DatabaseHeader header) {
459
466
  throw FatalException("Checkpoint aborted after free list write because of PRAGMA checkpoint_abort flag");
460
467
  }
461
468
 
462
- if (!use_direct_io) {
469
+ if (!options.use_direct_io) {
463
470
  // if we are not using Direct IO we need to fsync BEFORE we write the header to ensure that all the previous
464
471
  // blocks are written as well
465
472
  handle->Sync();
@@ -20,14 +20,17 @@ struct BufferAllocatorData : PrivateAllocatorData {
20
20
 
21
21
  unique_ptr<FileBuffer> StandardBufferManager::ConstructManagedBuffer(idx_t size, unique_ptr<FileBuffer> &&source,
22
22
  FileBufferType type) {
23
+ unique_ptr<FileBuffer> result;
23
24
  if (source) {
24
25
  auto tmp = std::move(source);
25
26
  D_ASSERT(tmp->AllocSize() == BufferManager::GetAllocSize(size));
26
- return make_uniq<FileBuffer>(*tmp, type);
27
+ result = make_uniq<FileBuffer>(*tmp, type);
27
28
  } else {
28
29
  // no re-usable buffer: allocate a new buffer
29
- return make_uniq<FileBuffer>(Allocator::Get(db), type, size);
30
+ result = make_uniq<FileBuffer>(Allocator::Get(db), type, size);
30
31
  }
32
+ result->Initialize(DBConfig::GetConfig(db).options.debug_initialize);
33
+ return result;
31
34
  }
32
35
 
33
36
  class TemporaryFileManager;
@@ -116,7 +116,7 @@ static void ConstructValue(const_data_ptr_t data, idx_t size, data_t target[]) {
116
116
  }
117
117
 
118
118
  void StringStats::Update(BaseStatistics &stats, const string_t &value) {
119
- auto data = (const_data_ptr_t)value.GetDataUnsafe();
119
+ auto data = (const_data_ptr_t)value.GetData();
120
120
  auto size = value.GetSize();
121
121
 
122
122
  //! we can only fit 8 bytes, so we might need to trim our string
@@ -238,7 +238,7 @@ void StringStats::Verify(const BaseStatistics &stats, Vector &vector, const Sele
238
238
  continue;
239
239
  }
240
240
  auto value = data[index];
241
- auto data = value.GetDataUnsafe();
241
+ auto data = value.GetData();
242
242
  auto len = value.GetSize();
243
243
  // LCOV_EXCL_START
244
244
  if (string_data.has_max_string_length && len > string_data.max_string_length) {
@@ -95,6 +95,11 @@ void SingleFileStorageManager::LoadDatabase() {
95
95
  auto &fs = FileSystem::Get(db);
96
96
  auto &config = DBConfig::Get(db);
97
97
  bool truncate_wal = false;
98
+
99
+ StorageManagerOptions options;
100
+ options.read_only = read_only;
101
+ options.use_direct_io = config.options.use_direct_io;
102
+ options.debug_initialize = config.options.debug_initialize;
98
103
  // first check if the database exists
99
104
  if (!fs.FileExists(path)) {
100
105
  if (read_only) {
@@ -107,13 +112,13 @@ void SingleFileStorageManager::LoadDatabase() {
107
112
  fs.RemoveFile(wal_path);
108
113
  }
109
114
  // initialize the block manager while creating a new db file
110
- auto sf_block_manager = make_uniq<SingleFileBlockManager>(db, path, read_only, config.options.use_direct_io);
115
+ auto sf_block_manager = make_uniq<SingleFileBlockManager>(db, path, options);
111
116
  sf_block_manager->CreateNewDatabase();
112
117
  block_manager = std::move(sf_block_manager);
113
118
  table_io_manager = make_uniq<SingleFileTableIOManager>(*block_manager);
114
119
  } else {
115
120
  // initialize the block manager while loading the current db file
116
- auto sf_block_manager = make_uniq<SingleFileBlockManager>(db, path, read_only, config.options.use_direct_io);
121
+ auto sf_block_manager = make_uniq<SingleFileBlockManager>(db, path, options);
117
122
  sf_block_manager->LoadExistingDatabase();
118
123
  block_manager = std::move(sf_block_manager);
119
124
  table_io_manager = make_uniq<SingleFileTableIOManager>(*block_manager);
@@ -50,16 +50,36 @@ public:
50
50
  BlockManager &block_manager;
51
51
  vector<PartialColumnSegment> tail_segments;
52
52
 
53
+ private:
54
+ struct UninitializedRegion {
55
+ idx_t start;
56
+ idx_t end;
57
+ };
58
+ vector<UninitializedRegion> uninitialized_regions;
59
+
53
60
  public:
54
61
  bool IsFlushed() {
55
62
  // first_segment is zeroed on Flush
56
63
  return !first_segment;
57
64
  }
58
65
 
59
- void Flush() override {
66
+ void AddUninitializedRegion(idx_t start, idx_t end) override {
67
+ uninitialized_regions.push_back({start, end});
68
+ }
69
+
70
+ void Flush(idx_t free_space_left) override {
60
71
  // At this point, we've already copied all data from tail_segments
61
72
  // into the page owned by first_segment. We flush all segment data to
62
73
  // disk with the following call.
74
+ if (free_space_left > 0 || !uninitialized_regions.empty()) {
75
+ auto handle = block_manager.buffer_manager.Pin(first_segment->block);
76
+ // memset any uninitialized regions
77
+ for (auto &uninitialized : uninitialized_regions) {
78
+ memset(handle.Ptr() + uninitialized.start, 0, uninitialized.end - uninitialized.start);
79
+ }
80
+ // memset any free space at the end of the block to 0 prior to writing to disk
81
+ memset(handle.Ptr() + Storage::BLOCK_SIZE - free_space_left, 0, free_space_left);
82
+ }
63
83
  first_data->IncrementVersion();
64
84
  first_segment->ConvertToPersistent(&block_manager, state.block_id);
65
85
  // Now that the page is persistent, update tail_segments to point to the
@@ -1,5 +1,7 @@
1
1
  #include "src/execution/operator/join/outer_join_marker.cpp"
2
2
 
3
+ #include "src/execution/operator/join/physical_asof_join.cpp"
4
+
3
5
  #include "src/execution/operator/join/physical_blockwise_nl_join.cpp"
4
6
 
5
7
  #include "src/execution/operator/join/physical_comparison_join.cpp"
package/src/statement.cpp CHANGED
@@ -377,8 +377,8 @@ struct RunPreparedTask : public Task {
377
377
  // query results, the string data is owned by the QueryResult
378
378
  auto result_ref_ptr = new std::shared_ptr<duckdb::QueryResult>(result_ptr);
379
379
 
380
- auto array_buffer = Napi::ArrayBuffer::New(env, (void *)blob.GetDataUnsafe(), blob.GetSize(),
381
- deleter, result_ref_ptr);
380
+ auto array_buffer =
381
+ Napi::ArrayBuffer::New(env, (void *)blob.GetData(), blob.GetSize(), deleter, result_ref_ptr);
382
382
 
383
383
  auto typed_array = Napi::Uint8Array::New(env, blob.GetSize(), array_buffer, 0);
384
384
 
@@ -615,7 +615,7 @@ struct GetNextArrowIpcTask : public Task {
615
615
  delete static_cast<unique_ptr<duckdb::DataChunk> *>(hint);
616
616
  };
617
617
  auto array_buffer =
618
- Napi::ArrayBuffer::New(env, (void *)blob.GetDataUnsafe(), blob.GetSize(), deleter, data_chunk_ptr);
618
+ Napi::ArrayBuffer::New(env, (void *)blob.GetData(), blob.GetSize(), deleter, data_chunk_ptr);
619
619
 
620
620
  deferred.Resolve(array_buffer);
621
621
  }