duckdb 1.1.0 → 1.1.1-dev3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/binding.gyp +2 -1
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  4. package/src/duckdb/extension/json/include/json_common.hpp +14 -4
  5. package/src/duckdb/extension/json/include/json_executors.hpp +11 -3
  6. package/src/duckdb/extension/json/json_extension.cpp +1 -1
  7. package/src/duckdb/extension/json/json_functions/json_extract.cpp +11 -3
  8. package/src/duckdb/extension/json/json_functions/json_value.cpp +4 -3
  9. package/src/duckdb/extension/json/json_functions.cpp +16 -7
  10. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  11. package/src/duckdb/extension/parquet/column_writer.cpp +54 -43
  12. package/src/duckdb/extension/parquet/geo_parquet.cpp +19 -0
  13. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +10 -6
  14. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +3 -3
  15. package/src/duckdb/extension/parquet/parquet_writer.cpp +2 -1
  16. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  17. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +1 -0
  18. package/src/duckdb/src/common/arrow/arrow_util.cpp +60 -0
  19. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +1 -53
  20. package/src/duckdb/src/common/cgroups.cpp +15 -24
  21. package/src/duckdb/src/common/constants.cpp +8 -0
  22. package/src/duckdb/src/common/enum_util.cpp +331 -326
  23. package/src/duckdb/src/common/http_util.cpp +5 -1
  24. package/src/duckdb/src/common/operator/cast_operators.cpp +6 -60
  25. package/src/duckdb/src/common/types/bit.cpp +1 -1
  26. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -1
  27. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +2 -1
  28. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +5 -0
  29. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +1 -1
  30. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -1
  31. package/src/duckdb/src/execution/index/art/iterator.cpp +17 -15
  32. package/src/duckdb/src/execution/index/art/prefix.cpp +9 -34
  33. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +4 -3
  34. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +1 -0
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -1
  36. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +2 -2
  37. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +23 -1
  38. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +33 -4
  39. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +23 -13
  40. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +23 -19
  41. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +12 -11
  42. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -14
  43. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +4 -4
  44. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +3 -1
  45. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -1
  46. package/src/duckdb/src/function/cast/decimal_cast.cpp +33 -3
  47. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +9 -0
  48. package/src/duckdb/src/function/table/arrow.cpp +34 -22
  49. package/src/duckdb/src/function/table/sniff_csv.cpp +4 -1
  50. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  51. package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +31 -0
  52. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +2 -16
  53. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +60 -0
  54. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -0
  55. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +0 -1
  56. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +2 -1
  57. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +9 -5
  58. package/src/duckdb/src/include/duckdb/execution/executor.hpp +1 -0
  59. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +5 -2
  60. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +5 -1
  61. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +5 -5
  62. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -0
  63. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -0
  64. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/main/extension.hpp +1 -0
  66. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +14 -5
  67. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/main/settings.hpp +4 -2
  69. package/src/duckdb/src/include/duckdb/parser/keyword_helper.hpp +3 -0
  70. package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +7 -1
  72. package/src/duckdb/src/include/duckdb/planner/binder.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -1
  75. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -1
  76. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +7 -4
  77. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +2 -2
  78. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +2 -1
  79. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +4 -4
  80. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -4
  81. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +4 -2
  83. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +1 -0
  85. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -0
  86. package/src/duckdb/src/include/duckdb/transaction/transaction_manager.hpp +1 -1
  87. package/src/duckdb/src/include/duckdb.h +8 -8
  88. package/src/duckdb/src/main/appender.cpp +1 -1
  89. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +3 -3
  90. package/src/duckdb/src/main/capi/helper-c.cpp +4 -0
  91. package/src/duckdb/src/main/config.cpp +24 -11
  92. package/src/duckdb/src/main/database.cpp +6 -5
  93. package/src/duckdb/src/main/extension/extension_install.cpp +13 -8
  94. package/src/duckdb/src/main/extension/extension_load.cpp +10 -4
  95. package/src/duckdb/src/main/extension.cpp +1 -1
  96. package/src/duckdb/src/optimizer/filter_pushdown.cpp +10 -1
  97. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +9 -5
  98. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +14 -8
  99. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -0
  100. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -0
  101. package/src/duckdb/src/optimizer/optimizer.cpp +4 -1
  102. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -11
  103. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +1 -7
  104. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
  105. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +3 -0
  106. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
  107. package/src/duckdb/src/parser/keyword_helper.cpp +4 -0
  108. package/src/duckdb/src/parser/parser.cpp +20 -18
  109. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +8 -3
  110. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -0
  111. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +7 -1
  112. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +13 -0
  113. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -11
  114. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +27 -10
  115. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +24 -9
  116. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +1 -3
  117. package/src/duckdb/src/planner/binder.cpp +5 -6
  118. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +1 -0
  119. package/src/duckdb/src/planner/expression_binder/select_binder.cpp +9 -0
  120. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +2 -2
  121. package/src/duckdb/src/planner/operator/logical_positional_join.cpp +1 -0
  122. package/src/duckdb/src/storage/buffer/block_handle.cpp +18 -21
  123. package/src/duckdb/src/storage/buffer/block_manager.cpp +12 -4
  124. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
  125. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +12 -2
  126. package/src/duckdb/src/storage/buffer_manager.cpp +3 -2
  127. package/src/duckdb/src/storage/compression/rle.cpp +5 -2
  128. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -1
  129. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +8 -7
  130. package/src/duckdb/src/storage/standard_buffer_manager.cpp +19 -20
  131. package/src/duckdb/src/storage/statistics/column_statistics.cpp +1 -2
  132. package/src/duckdb/src/storage/table/column_data.cpp +5 -2
  133. package/src/duckdb/src/storage/table/column_segment.cpp +2 -2
  134. package/src/duckdb/src/storage/table/row_group_collection.cpp +18 -14
  135. package/src/duckdb/src/storage/table/standard_column_data.cpp +3 -3
  136. package/src/duckdb/src/storage/wal_replay.cpp +2 -3
  137. package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -0
  138. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
  139. package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -2
  140. package/src/duckdb/third_party/libpg_query/include/pg_simplified_token.hpp +6 -4
  141. package/src/duckdb/third_party/libpg_query/include/postgres_parser.hpp +1 -1
  142. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +1 -1
  143. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +801 -799
  144. package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +6 -2
  145. package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +0 -1
  146. package/src/duckdb/ub_src_common_arrow.cpp +2 -0
  147. package/vendor.py +1 -2
@@ -126,9 +126,8 @@ shared_ptr<BlockHandle> StandardBufferManager::RegisterTransientMemory(const idx
126
126
  return RegisterSmallMemory(size);
127
127
  }
128
128
 
129
- shared_ptr<BlockHandle> block;
130
- Allocate(MemoryTag::IN_MEMORY_TABLE, size, false, &block);
131
- return block;
129
+ auto buffer_handle = Allocate(MemoryTag::IN_MEMORY_TABLE, size, false);
130
+ return buffer_handle.GetBlockHandle();
132
131
  }
133
132
 
134
133
  shared_ptr<BlockHandle> StandardBufferManager::RegisterSmallMemory(const idx_t size) {
@@ -164,17 +163,14 @@ shared_ptr<BlockHandle> StandardBufferManager::RegisterMemory(MemoryTag tag, idx
164
163
  destroy_buffer_upon, alloc_size, std::move(res));
165
164
  }
166
165
 
167
- BufferHandle StandardBufferManager::Allocate(MemoryTag tag, idx_t block_size, bool can_destroy,
168
- shared_ptr<BlockHandle> *block) {
169
- shared_ptr<BlockHandle> local_block;
170
- auto block_ptr = block ? block : &local_block;
171
- *block_ptr = RegisterMemory(tag, block_size, can_destroy);
166
+ BufferHandle StandardBufferManager::Allocate(MemoryTag tag, idx_t block_size, bool can_destroy) {
167
+ auto block = RegisterMemory(tag, block_size, can_destroy);
172
168
 
173
169
  #ifdef DUCKDB_DEBUG_DESTROY_BLOCKS
174
170
  // Initialize the memory with garbage data
175
- WriteGarbageIntoBuffer(*(*block_ptr)->buffer);
171
+ WriteGarbageIntoBuffer(*block->buffer);
176
172
  #endif
177
- return Pin(*block_ptr);
173
+ return Pin(block);
178
174
  }
179
175
 
180
176
  void StandardBufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size) {
@@ -253,7 +249,7 @@ void StandardBufferManager::BatchRead(vector<shared_ptr<BlockHandle>> &handles,
253
249
  }
254
250
  auto block_ptr =
255
251
  intermediate_buffer.GetFileBuffer().InternalBuffer() + block_idx * block_manager.GetBlockAllocSize();
256
- buf = BlockHandle::LoadFromBuffer(handle, block_ptr, std::move(reusable_buffer));
252
+ buf = handle->LoadFromBuffer(block_ptr, std::move(reusable_buffer));
257
253
  handle->readers = 1;
258
254
  handle->memory_charge = std::move(reservation);
259
255
  }
@@ -314,7 +310,7 @@ BufferHandle StandardBufferManager::Pin(shared_ptr<BlockHandle> &handle) {
314
310
  if (handle->state == BlockState::BLOCK_LOADED) {
315
311
  // the block is loaded, increment the reader count and set the BufferHandle
316
312
  handle->readers++;
317
- buf = handle->Load(handle);
313
+ buf = handle->Load();
318
314
  }
319
315
  required_memory = handle->memory_usage;
320
316
  }
@@ -335,11 +331,11 @@ BufferHandle StandardBufferManager::Pin(shared_ptr<BlockHandle> &handle) {
335
331
  // the block is loaded, increment the reader count and return a pointer to the handle
336
332
  handle->readers++;
337
333
  reservation.Resize(0);
338
- buf = handle->Load(handle);
334
+ buf = handle->Load();
339
335
  } else {
340
336
  // now we can actually load the current block
341
337
  D_ASSERT(handle->readers == 0);
342
- buf = handle->Load(handle, std::move(reusable_buffer));
338
+ buf = handle->Load(std::move(reusable_buffer));
343
339
  handle->readers = 1;
344
340
  handle->memory_charge = std::move(reservation);
345
341
  // in the case of a variable sized block, the buffer may be smaller than a full block.
@@ -480,33 +476,34 @@ void StandardBufferManager::WriteTemporaryBuffer(MemoryTag tag, block_id_t block
480
476
  buffer.Write(*handle, sizeof(idx_t));
481
477
  }
482
478
 
483
- unique_ptr<FileBuffer> StandardBufferManager::ReadTemporaryBuffer(MemoryTag tag, block_id_t id,
479
+ unique_ptr<FileBuffer> StandardBufferManager::ReadTemporaryBuffer(MemoryTag tag, BlockHandle &block,
484
480
  unique_ptr<FileBuffer> reusable_buffer) {
485
481
  D_ASSERT(!temporary_directory.path.empty());
486
482
  D_ASSERT(temporary_directory.handle.get());
483
+ auto id = block.BlockId();
487
484
  if (temporary_directory.handle->GetTempFile().HasTemporaryBuffer(id)) {
488
- evicted_data_per_tag[uint8_t(tag)] -= GetBlockSize();
485
+ // This is a block that was offloaded to a regular .tmp file, the file contains blocks of a fixed size
489
486
  return temporary_directory.handle->GetTempFile().ReadTemporaryBuffer(id, std::move(reusable_buffer));
490
487
  }
491
488
 
492
- // Open the temporary file and read its size.
489
+ // This block contains data of variable size so we need to open it and read it to get its size.
493
490
  idx_t block_size;
494
491
  auto path = GetTemporaryPath(id);
495
492
  auto &fs = FileSystem::GetFileSystem(db);
496
493
  auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ);
497
494
  handle->Read(&block_size, sizeof(idx_t), 0);
498
- evicted_data_per_tag[uint8_t(tag)] -= block_size;
499
495
 
500
496
  // Allocate a buffer of the file's size and read the data into that buffer.
501
497
  auto buffer = ReadTemporaryBufferInternal(*this, *handle, sizeof(idx_t), block_size, std::move(reusable_buffer));
502
498
  handle.reset();
503
499
 
504
500
  // Delete the file and return the buffer.
505
- DeleteTemporaryFile(id);
501
+ DeleteTemporaryFile(block);
506
502
  return buffer;
507
503
  }
508
504
 
509
- void StandardBufferManager::DeleteTemporaryFile(block_id_t id) {
505
+ void StandardBufferManager::DeleteTemporaryFile(BlockHandle &block) {
506
+ auto id = block.BlockId();
510
507
  if (temporary_directory.path.empty()) {
511
508
  // no temporary directory specified: nothing to delete
512
509
  return;
@@ -520,6 +517,7 @@ void StandardBufferManager::DeleteTemporaryFile(block_id_t id) {
520
517
  }
521
518
  // check if we should delete the file from the shared pool of files, or from the general file system
522
519
  if (temporary_directory.handle->GetTempFile().HasTemporaryBuffer(id)) {
520
+ evicted_data_per_tag[uint8_t(block.GetMemoryTag())] -= GetBlockSize();
523
521
  temporary_directory.handle->GetTempFile().DeleteTemporaryBuffer(id);
524
522
  return;
525
523
  }
@@ -528,6 +526,7 @@ void StandardBufferManager::DeleteTemporaryFile(block_id_t id) {
528
526
  auto &fs = FileSystem::GetFileSystem(db);
529
527
  auto path = GetTemporaryPath(id);
530
528
  if (fs.FileExists(path)) {
529
+ evicted_data_per_tag[uint8_t(block.GetMemoryTag())] -= block.GetMemoryUsage();
531
530
  auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ);
532
531
  auto content_size = handle->GetFileSize();
533
532
  handle.reset();
@@ -20,8 +20,7 @@ shared_ptr<ColumnStatistics> ColumnStatistics::CreateEmptyStats(const LogicalTyp
20
20
 
21
21
  void ColumnStatistics::Merge(ColumnStatistics &other) {
22
22
  stats.Merge(other.stats);
23
- if (distinct_stats) {
24
- D_ASSERT(other.distinct_stats);
23
+ if (distinct_stats && other.distinct_stats) {
25
24
  distinct_stats->Merge(*other.distinct_stats);
26
25
  }
27
26
  }
@@ -92,7 +92,10 @@ void ColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_idx)
92
92
  state.last_offset = 0;
93
93
  }
94
94
 
95
- ScanVectorType ColumnData::GetVectorScanType(ColumnScanState &state, idx_t scan_count) {
95
+ ScanVectorType ColumnData::GetVectorScanType(ColumnScanState &state, idx_t scan_count, Vector &result) {
96
+ if (result.GetVectorType() != VectorType::FLAT_VECTOR) {
97
+ return ScanVectorType::SCAN_ENTIRE_VECTOR;
98
+ }
96
99
  if (HasUpdates()) {
97
100
  // if we have updates we need to merge in the updates
98
101
  // always need to scan flat vectors
@@ -230,7 +233,7 @@ void ColumnData::UpdateInternal(TransactionData transaction, idx_t column_index,
230
233
  template <bool SCAN_COMMITTED, bool ALLOW_UPDATES>
231
234
  idx_t ColumnData::ScanVector(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result,
232
235
  idx_t target_scan) {
233
- auto scan_type = GetVectorScanType(state, target_scan);
236
+ auto scan_type = GetVectorScanType(state, target_scan, result);
234
237
  auto scan_count = ScanVector(state, result, target_scan, scan_type);
235
238
  if (scan_type != ScanVectorType::SCAN_ENTIRE_VECTOR) {
236
239
  // if we are scanning an entire vector we cannot have updates
@@ -161,8 +161,8 @@ void ColumnSegment::Resize(idx_t new_size) {
161
161
 
162
162
  auto &buffer_manager = BufferManager::GetBufferManager(db);
163
163
  auto old_handle = buffer_manager.Pin(block);
164
- shared_ptr<BlockHandle> new_block;
165
- auto new_handle = buffer_manager.Allocate(MemoryTag::IN_MEMORY_TABLE, new_size, false, &new_block);
164
+ auto new_handle = buffer_manager.Allocate(MemoryTag::IN_MEMORY_TABLE, new_size);
165
+ auto new_block = new_handle.GetBlockHandle();
166
166
  memcpy(new_handle.Ptr(), old_handle.Ptr(), segment_size);
167
167
 
168
168
  this->block_id = new_block->BlockId();
@@ -848,9 +848,10 @@ public:
848
848
  if (total_append_count != merge_rows) {
849
849
  throw InternalException("Mismatch in row group count vs verify count in RowGroupCollection::Checkpoint");
850
850
  }
851
- // merging is complete - schedule checkpoint tasks of the target row groups
851
+ // merging is complete - execute checkpoint tasks of the target row groups
852
852
  for (idx_t i = 0; i < target_count; i++) {
853
- collection.ScheduleCheckpointTask(checkpoint_state, segment_idx + i);
853
+ auto checkpoint_task = collection.GetCheckpointTask(checkpoint_state, segment_idx + i);
854
+ checkpoint_task->ExecuteTask();
854
855
  }
855
856
  }
856
857
 
@@ -886,7 +887,7 @@ void RowGroupCollection::InitializeVacuumState(CollectionCheckpointState &checkp
886
887
  }
887
888
 
888
889
  bool RowGroupCollection::ScheduleVacuumTasks(CollectionCheckpointState &checkpoint_state, VacuumState &state,
889
- idx_t segment_idx) {
890
+ idx_t segment_idx, bool schedule_vacuum) {
890
891
  static constexpr const idx_t MAX_MERGE_COUNT = 3;
891
892
 
892
893
  if (!state.can_vacuum_deletes) {
@@ -902,6 +903,9 @@ bool RowGroupCollection::ScheduleVacuumTasks(CollectionCheckpointState &checkpoi
902
903
  D_ASSERT(!checkpoint_state.segments[segment_idx].node);
903
904
  return false;
904
905
  }
906
+ if (!schedule_vacuum) {
907
+ return false;
908
+ }
905
909
  idx_t merge_rows;
906
910
  idx_t next_idx = 0;
907
911
  idx_t merge_count;
@@ -952,9 +956,9 @@ bool RowGroupCollection::ScheduleVacuumTasks(CollectionCheckpointState &checkpoi
952
956
  //===--------------------------------------------------------------------===//
953
957
  // Checkpoint
954
958
  //===--------------------------------------------------------------------===//
955
- void RowGroupCollection::ScheduleCheckpointTask(CollectionCheckpointState &checkpoint_state, idx_t segment_idx) {
956
- auto checkpoint_task = make_uniq<CheckpointTask>(checkpoint_state, segment_idx);
957
- checkpoint_state.executor.ScheduleTask(std::move(checkpoint_task));
959
+ unique_ptr<CheckpointTask> RowGroupCollection::GetCheckpointTask(CollectionCheckpointState &checkpoint_state,
960
+ idx_t segment_idx) {
961
+ return make_uniq<CheckpointTask>(checkpoint_state, segment_idx);
958
962
  }
959
963
 
960
964
  void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &global_stats) {
@@ -970,13 +974,12 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl
970
974
  auto &config = DBConfig::GetConfig(writer.GetDatabase());
971
975
  for (idx_t segment_idx = 0; segment_idx < segments.size(); segment_idx++) {
972
976
  auto &entry = segments[segment_idx];
973
- if (total_vacuum_tasks < config.options.max_vacuum_tasks) {
974
- auto vacuum_tasks = ScheduleVacuumTasks(checkpoint_state, vacuum_state, segment_idx);
975
- if (vacuum_tasks) {
976
- // vacuum tasks were scheduled - don't schedule a checkpoint task yet
977
- total_vacuum_tasks++;
978
- continue;
979
- }
977
+ auto vacuum_tasks = ScheduleVacuumTasks(checkpoint_state, vacuum_state, segment_idx,
978
+ total_vacuum_tasks < config.options.max_vacuum_tasks);
979
+ if (vacuum_tasks) {
980
+ // vacuum tasks were scheduled - don't schedule a checkpoint task yet
981
+ total_vacuum_tasks++;
982
+ continue;
980
983
  }
981
984
  if (!entry.node) {
982
985
  // row group was vacuumed/dropped - skip
@@ -984,7 +987,8 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl
984
987
  }
985
988
  // schedule a checkpoint task for this row group
986
989
  entry.node->MoveToCollection(*this, vacuum_state.row_start);
987
- ScheduleCheckpointTask(checkpoint_state, segment_idx);
990
+ auto checkpoint_task = GetCheckpointTask(checkpoint_state, segment_idx);
991
+ checkpoint_state.executor.ScheduleTask(std::move(checkpoint_task));
988
992
  vacuum_state.row_start += entry.node->count;
989
993
  }
990
994
  // all tasks have been scheduled - execute tasks until we are done
@@ -21,16 +21,16 @@ void StandardColumnData::SetStart(idx_t new_start) {
21
21
  validity.SetStart(new_start);
22
22
  }
23
23
 
24
- ScanVectorType StandardColumnData::GetVectorScanType(ColumnScanState &state, idx_t scan_count) {
24
+ ScanVectorType StandardColumnData::GetVectorScanType(ColumnScanState &state, idx_t scan_count, Vector &result) {
25
25
  // if either the current column data, or the validity column data requires flat vectors, we scan flat vectors
26
- auto scan_type = ColumnData::GetVectorScanType(state, scan_count);
26
+ auto scan_type = ColumnData::GetVectorScanType(state, scan_count, result);
27
27
  if (scan_type == ScanVectorType::SCAN_FLAT_VECTOR) {
28
28
  return ScanVectorType::SCAN_FLAT_VECTOR;
29
29
  }
30
30
  if (state.child_states.empty()) {
31
31
  return scan_type;
32
32
  }
33
- return validity.GetVectorScanType(state.child_states[0], scan_count);
33
+ return validity.GetVectorScanType(state.child_states[0], scan_count, result);
34
34
  }
35
35
 
36
36
  void StandardColumnData::InitializePrefetch(PrefetchState &prefetch_state, ColumnScanState &scan_state, idx_t rows) {
@@ -554,9 +554,8 @@ void WriteAheadLogDeserializer::ReplayCreateIndex() {
554
554
  for (idx_t j = 0; j < data_info.allocation_sizes.size(); j++) {
555
555
 
556
556
  // read the data into a buffer handle
557
- shared_ptr<BlockHandle> block_handle;
558
- buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager->GetBlockSize(), false, &block_handle);
559
- auto buffer_handle = buffer_manager.Pin(block_handle);
557
+ auto buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager->GetBlockSize(), false);
558
+ auto block_handle = buffer_handle.GetBlockHandle();
560
559
  auto data_ptr = buffer_handle.Ptr();
561
560
 
562
561
  list.ReadElement<bool>(data_ptr, data_info.allocation_sizes[j]);
@@ -14,6 +14,7 @@
14
14
  #pragma once
15
15
 
16
16
  #include <cstdint>
17
+ #include "pg_simplified_token.hpp"
17
18
 
18
19
  /* Keyword categories --- should match lists in gram.y */
19
20
  #define UNRESERVED_KEYWORD 0
@@ -1287,6 +1287,7 @@ typedef struct PGSelectStmt {
1287
1287
  */
1288
1288
  PGSetOperation op; /* type of set op */
1289
1289
  bool all; /* ALL specified? */
1290
+ bool from_first; /* FROM first or SELECT first */
1290
1291
  struct PGNode *larg; /* left child */
1291
1292
  struct PGNode *rarg; /* right child */
1292
1293
  /* Eventually add fields for CORRESPONDING spec here */
@@ -17,7 +17,6 @@
17
17
  #include "nodes/parsenodes.hpp"
18
18
  #include "pg_simplified_token.hpp"
19
19
  #include <vector>
20
-
21
20
  namespace duckdb_libpgquery {
22
21
 
23
22
  typedef enum PGBackslashQuoteType {
@@ -29,7 +28,7 @@ typedef enum PGBackslashQuoteType {
29
28
  /* Primary entry point for the raw parsing functions */
30
29
  PGList *raw_parser(const char *str);
31
30
 
32
- bool is_keyword(const char *str);
31
+ PGKeywordCategory is_keyword(const char *str);
33
32
  std::vector<PGKeyword> keyword_list();
34
33
 
35
34
  std::vector<PGSimplifiedToken> tokenize(const char *str);
@@ -20,12 +20,14 @@ struct PGSimplifiedToken {
20
20
  };
21
21
 
22
22
  enum class PGKeywordCategory : uint8_t {
23
- PG_KEYWORD_RESERVED,
24
- PG_KEYWORD_UNRESERVED,
25
- PG_KEYWORD_TYPE_FUNC,
26
- PG_KEYWORD_COL_NAME
23
+ PG_KEYWORD_UNRESERVED = 0,
24
+ PG_KEYWORD_COL_NAME = 1,
25
+ PG_KEYWORD_TYPE_FUNC= 2,
26
+ PG_KEYWORD_RESERVED = 3,
27
+ PG_KEYWORD_NONE = 4
27
28
  };
28
29
 
30
+
29
31
  struct PGKeyword {
30
32
  std::string text;
31
33
  PGKeywordCategory category;
@@ -28,7 +28,7 @@ public:
28
28
  void Parse(const std::string &query);
29
29
  static duckdb::vector<duckdb_libpgquery::PGSimplifiedToken> Tokenize(const std::string &query);
30
30
 
31
- static bool IsKeyword(const std::string &text);
31
+ static duckdb_libpgquery::PGKeywordCategory IsKeyword(const std::string &text);
32
32
  static duckdb::vector<duckdb_libpgquery::PGKeyword> KeywordList();
33
33
 
34
34
  static void SetPreserveIdentifierCase(bool downcase);
@@ -34,7 +34,7 @@ PostgresParser::~PostgresParser() {
34
34
  duckdb_libpgquery::pg_parser_cleanup();
35
35
  }
36
36
 
37
- bool PostgresParser::IsKeyword(const std::string &text) {
37
+ duckdb_libpgquery::PGKeywordCategory PostgresParser::IsKeyword(const std::string &text) {
38
38
  return duckdb_libpgquery::is_keyword(text.c_str());
39
39
  }
40
40