duckdb 0.8.2-dev1862.0 → 0.8.2-dev1968.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/binding.gyp +9 -9
  2. package/package.json +1 -1
  3. package/src/duckdb/src/common/enum_util.cpp +28 -0
  4. package/src/duckdb/src/common/types/hugeint.cpp +40 -0
  5. package/src/duckdb/src/core_functions/function_list.cpp +1 -0
  6. package/src/duckdb/src/core_functions/scalar/string/to_base.cpp +66 -0
  7. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +14 -11
  8. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +6 -4
  9. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +14 -12
  10. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +6 -4
  11. package/src/duckdb/src/execution/operator/helper/physical_batch_collector.cpp +8 -6
  12. package/src/duckdb/src/execution/operator/helper/physical_explain_analyze.cpp +2 -2
  13. package/src/duckdb/src/execution/operator/helper/physical_limit.cpp +5 -3
  14. package/src/duckdb/src/execution/operator/helper/physical_materialized_collector.cpp +7 -5
  15. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +7 -5
  16. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +5 -4
  17. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -2
  18. package/src/duckdb/src/execution/operator/join/physical_delim_join.cpp +13 -6
  19. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +7 -5
  20. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -5
  21. package/src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp +7 -4
  22. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +8 -6
  23. package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -5
  24. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +7 -5
  25. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +8 -6
  26. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +8 -7
  27. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +8 -6
  28. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +11 -9
  29. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +10 -10
  30. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +4 -2
  31. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +7 -6
  32. package/src/duckdb/src/execution/physical_operator.cpp +3 -2
  33. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  34. package/src/duckdb/src/include/duckdb/common/bitpacking.hpp +70 -55
  35. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  36. package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +5 -1
  37. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +15 -0
  38. package/src/duckdb/src/include/duckdb/common/limits.hpp +52 -149
  39. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +48 -0
  40. package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/core_functions/scalar/operators_functions.hpp +1 -1
  42. package/src/duckdb/src/include/duckdb/core_functions/scalar/string_functions.hpp +9 -0
  43. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +3 -3
  44. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_perfecthash_aggregate.hpp +1 -1
  45. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +3 -3
  46. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +2 -2
  47. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
  48. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_explain_analyze.hpp +1 -1
  49. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_materialized_collector.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_vacuum.hpp +2 -2
  52. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +2 -2
  53. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_blockwise_nl_join.hpp +1 -1
  54. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_delim_join.hpp +2 -2
  55. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +2 -2
  56. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +2 -2
  57. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp +2 -2
  58. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_piecewise_merge_join.hpp +2 -2
  59. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +2 -2
  61. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
  62. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  63. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -2
  64. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +2 -2
  66. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +1 -1
  67. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +2 -2
  68. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +3 -3
  69. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +11 -0
  70. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +3 -0
  71. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -3
  72. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +3 -2
  73. package/src/duckdb/src/main/relation/join_relation.cpp +1 -1
  74. package/src/duckdb/src/parallel/pipeline.cpp +0 -17
  75. package/src/duckdb/src/parallel/pipeline_executor.cpp +26 -7
  76. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +55 -1
  77. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +13 -3
  78. package/src/duckdb/src/storage/compression/bitpacking.cpp +87 -63
  79. package/src/duckdb/src/storage/compression/bitpacking_hugeint.cpp +295 -0
  80. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +6 -6
  81. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +2 -0
  82. package/src/duckdb/ub_src_storage_compression.cpp +2 -0
@@ -191,17 +191,20 @@ SinkResultType PhysicalNestedLoopJoin::Sink(ExecutionContext &context, DataChunk
191
191
  return SinkResultType::NEED_MORE_INPUT;
192
192
  }
193
193
 
194
- void PhysicalNestedLoopJoin::Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const {
195
- auto &state = lstate.Cast<NestedLoopJoinLocalState>();
194
+ SinkCombineResultType PhysicalNestedLoopJoin::Combine(ExecutionContext &context,
195
+ OperatorSinkCombineInput &input) const {
196
+ auto &state = input.local_state.Cast<NestedLoopJoinLocalState>();
196
197
  auto &client_profiler = QueryProfiler::Get(context.client);
197
198
 
198
199
  context.thread.profiler.Flush(*this, state.rhs_executor, "rhs_executor", 1);
199
200
  client_profiler.Flush(context.thread.profiler);
201
+
202
+ return SinkCombineResultType::FINISHED;
200
203
  }
201
204
 
202
205
  SinkFinalizeType PhysicalNestedLoopJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
203
- GlobalSinkState &gstate_p) const {
204
- auto &gstate = gstate_p.Cast<NestedLoopJoinGlobalState>();
206
+ OperatorSinkFinalizeInput &input) const {
207
+ auto &gstate = input.global_state.Cast<NestedLoopJoinGlobalState>();
205
208
  gstate.right_outer.Initialize(gstate.right_payload_data.Count());
206
209
  if (gstate.right_payload_data.Count() == 0 && EmptyResultIfRHSIsEmpty()) {
207
210
  return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
@@ -118,23 +118,25 @@ SinkResultType PhysicalPiecewiseMergeJoin::Sink(ExecutionContext &context, DataC
118
118
  return SinkResultType::NEED_MORE_INPUT;
119
119
  }
120
120
 
121
- void PhysicalPiecewiseMergeJoin::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
122
- LocalSinkState &lstate_p) const {
123
- auto &gstate = gstate_p.Cast<MergeJoinGlobalState>();
124
- auto &lstate = lstate_p.Cast<MergeJoinLocalState>();
121
+ SinkCombineResultType PhysicalPiecewiseMergeJoin::Combine(ExecutionContext &context,
122
+ OperatorSinkCombineInput &input) const {
123
+ auto &gstate = input.global_state.Cast<MergeJoinGlobalState>();
124
+ auto &lstate = input.local_state.Cast<MergeJoinLocalState>();
125
125
  gstate.table->Combine(lstate.table);
126
126
  auto &client_profiler = QueryProfiler::Get(context.client);
127
127
 
128
128
  context.thread.profiler.Flush(*this, lstate.table.executor, "rhs_executor", 1);
129
129
  client_profiler.Flush(context.thread.profiler);
130
+
131
+ return SinkCombineResultType::FINISHED;
130
132
  }
131
133
 
132
134
  //===--------------------------------------------------------------------===//
133
135
  // Finalize
134
136
  //===--------------------------------------------------------------------===//
135
137
  SinkFinalizeType PhysicalPiecewiseMergeJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
136
- GlobalSinkState &gstate_p) const {
137
- auto &gstate = gstate_p.Cast<MergeJoinGlobalState>();
138
+ OperatorSinkFinalizeInput &input) const {
139
+ auto &gstate = input.global_state.Cast<MergeJoinGlobalState>();
138
140
  auto &global_sort_state = gstate.table->global_sort_state;
139
141
 
140
142
  if (IsRightOuterJoin(join_type)) {
@@ -101,10 +101,12 @@ SinkResultType PhysicalOrder::Sink(ExecutionContext &context, DataChunk &chunk,
101
101
  return SinkResultType::NEED_MORE_INPUT;
102
102
  }
103
103
 
104
- void PhysicalOrder::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
105
- auto &gstate = gstate_p.Cast<OrderGlobalSinkState>();
106
- auto &lstate = lstate_p.Cast<OrderLocalSinkState>();
104
+ SinkCombineResultType PhysicalOrder::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
105
+ auto &gstate = input.global_state.Cast<OrderGlobalSinkState>();
106
+ auto &lstate = input.local_state.Cast<OrderLocalSinkState>();
107
107
  gstate.global_sort_state.AddLocalState(lstate.local_sort_state);
108
+
109
+ return SinkCombineResultType::FINISHED;
108
110
  }
109
111
 
110
112
  class PhysicalOrderMergeTask : public ExecutorTask {
@@ -163,8 +165,8 @@ public:
163
165
  };
164
166
 
165
167
  SinkFinalizeType PhysicalOrder::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
166
- GlobalSinkState &gstate_p) const {
167
- auto &state = gstate_p.Cast<OrderGlobalSinkState>();
168
+ OperatorSinkFinalizeInput &input) const {
169
+ auto &state = input.global_state.Cast<OrderGlobalSinkState>();
168
170
  auto &global_sort_state = state.global_sort_state;
169
171
 
170
172
  if (global_sort_state.sorted_blocks.empty()) {
@@ -446,21 +446,23 @@ SinkResultType PhysicalTopN::Sink(ExecutionContext &context, DataChunk &chunk, O
446
446
  //===--------------------------------------------------------------------===//
447
447
  // Combine
448
448
  //===--------------------------------------------------------------------===//
449
- void PhysicalTopN::Combine(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p) const {
450
- auto &gstate = state.Cast<TopNGlobalState>();
451
- auto &lstate = lstate_p.Cast<TopNLocalState>();
449
+ SinkCombineResultType PhysicalTopN::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
450
+ auto &gstate = input.global_state.Cast<TopNGlobalState>();
451
+ auto &lstate = input.local_state.Cast<TopNLocalState>();
452
452
 
453
453
  // scan the local top N and append it to the global heap
454
454
  lock_guard<mutex> glock(gstate.lock);
455
455
  gstate.heap.Combine(lstate.heap);
456
+
457
+ return SinkCombineResultType::FINISHED;
456
458
  }
457
459
 
458
460
  //===--------------------------------------------------------------------===//
459
461
  // Finalize
460
462
  //===--------------------------------------------------------------------===//
461
463
  SinkFinalizeType PhysicalTopN::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
462
- GlobalSinkState &gstate_p) const {
463
- auto &gstate = gstate_p.Cast<TopNGlobalState>();
464
+ OperatorSinkFinalizeInput &input) const {
465
+ auto &gstate = input.global_state.Cast<TopNGlobalState>();
464
466
  // global finalize: compute the final top N
465
467
  gstate.heap.Finalize();
466
468
  return SinkFinalizeType::READY;
@@ -89,11 +89,13 @@ SinkResultType PhysicalBatchCopyToFile::Sink(ExecutionContext &context, DataChun
89
89
  return SinkResultType::NEED_MORE_INPUT;
90
90
  }
91
91
 
92
- void PhysicalBatchCopyToFile::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
93
- LocalSinkState &lstate) const {
94
- auto &state = lstate.Cast<BatchCopyToLocalState>();
95
- auto &gstate = gstate_p.Cast<BatchCopyToGlobalState>();
92
+ SinkCombineResultType PhysicalBatchCopyToFile::Combine(ExecutionContext &context,
93
+ OperatorSinkCombineInput &input) const {
94
+ auto &state = input.local_state.Cast<BatchCopyToLocalState>();
95
+ auto &gstate = input.global_state.Cast<BatchCopyToGlobalState>();
96
96
  gstate.rows_copied += state.rows_copied;
97
+
98
+ return SinkCombineResultType::FINISHED;
97
99
  }
98
100
 
99
101
  //===--------------------------------------------------------------------===//
@@ -114,8 +116,8 @@ SinkFinalizeType PhysicalBatchCopyToFile::FinalFlush(ClientContext &context, Glo
114
116
  }
115
117
 
116
118
  SinkFinalizeType PhysicalBatchCopyToFile::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
117
- GlobalSinkState &gstate_p) const {
118
- FinalFlush(context, gstate_p);
119
+ OperatorSinkFinalizeInput &input) const {
120
+ FinalFlush(context, input.global_state);
119
121
  return SinkFinalizeType::READY;
120
122
  }
121
123
 
@@ -344,16 +344,15 @@ SinkResultType PhysicalBatchInsert::Sink(ExecutionContext &context, DataChunk &c
344
344
  return SinkResultType::NEED_MORE_INPUT;
345
345
  }
346
346
 
347
- void PhysicalBatchInsert::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
348
- LocalSinkState &lstate_p) const {
349
- auto &gstate = gstate_p.Cast<BatchInsertGlobalState>();
350
- auto &lstate = lstate_p.Cast<BatchInsertLocalState>();
347
+ SinkCombineResultType PhysicalBatchInsert::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
348
+ auto &gstate = input.global_state.Cast<BatchInsertGlobalState>();
349
+ auto &lstate = input.local_state.Cast<BatchInsertLocalState>();
351
350
  auto &client_profiler = QueryProfiler::Get(context.client);
352
351
  context.thread.profiler.Flush(*this, lstate.default_executor, "default_executor", 1);
353
352
  client_profiler.Flush(context.thread.profiler);
354
353
 
355
354
  if (!lstate.current_collection) {
356
- return;
355
+ return SinkCombineResultType::FINISHED;
357
356
  }
358
357
 
359
358
  if (lstate.current_collection->GetTotalRows() > 0) {
@@ -366,11 +365,13 @@ void PhysicalBatchInsert::Combine(ExecutionContext &context, GlobalSinkState &gs
366
365
  lock_guard<mutex> l(gstate.lock);
367
366
  gstate.table.GetStorage().FinalizeOptimisticWriter(context.client, *lstate.writer);
368
367
  }
368
+
369
+ return SinkCombineResultType::FINISHED;
369
370
  }
370
371
 
371
372
  SinkFinalizeType PhysicalBatchInsert::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
372
- GlobalSinkState &gstate_p) const {
373
- auto &gstate = gstate_p.Cast<BatchInsertGlobalState>();
373
+ OperatorSinkFinalizeInput &input) const {
374
+ auto &gstate = input.global_state.Cast<BatchInsertGlobalState>();
374
375
 
375
376
  // in the finalize, do a final pass over all of the collections we created and try to merge smaller collections
376
377
  // together
@@ -97,9 +97,9 @@ static string CreateDirRecursive(const vector<idx_t> &cols, const vector<string>
97
97
  return path;
98
98
  }
99
99
 
100
- void PhysicalCopyToFile::Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const {
101
- auto &g = gstate.Cast<CopyToFunctionGlobalState>();
102
- auto &l = lstate.Cast<CopyToFunctionLocalState>();
100
+ SinkCombineResultType PhysicalCopyToFile::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
101
+ auto &g = input.global_state.Cast<CopyToFunctionGlobalState>();
102
+ auto &l = input.local_state.Cast<CopyToFunctionLocalState>();
103
103
 
104
104
  if (partition_output) {
105
105
  auto &fs = FileSystem::GetFileSystem(context.client);
@@ -130,7 +130,7 @@ void PhysicalCopyToFile::Combine(ExecutionContext &context, GlobalSinkState &gst
130
130
  function.copy_to_finalize(context.client, *bind_data, *fun_data_global);
131
131
  }
132
132
 
133
- return;
133
+ return SinkCombineResultType::FINISHED;
134
134
  }
135
135
 
136
136
  if (function.copy_to_combine) {
@@ -141,11 +141,13 @@ void PhysicalCopyToFile::Combine(ExecutionContext &context, GlobalSinkState &gst
141
141
  function.copy_to_finalize(context.client, *bind_data, *l.global_state);
142
142
  }
143
143
  }
144
+
145
+ return SinkCombineResultType::FINISHED;
144
146
  }
145
147
 
146
148
  SinkFinalizeType PhysicalCopyToFile::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
147
- GlobalSinkState &gstate_p) const {
148
- auto &gstate = gstate_p.Cast<CopyToFunctionGlobalState>();
149
+ OperatorSinkFinalizeInput &input) const {
150
+ auto &gstate = input.global_state.Cast<CopyToFunctionGlobalState>();
149
151
  if (per_thread_output || partition_output) {
150
152
  // already happened in combine
151
153
  return SinkFinalizeType::READY;
@@ -136,10 +136,10 @@ SinkResultType PhysicalFixedBatchCopy::Sink(ExecutionContext &context, DataChunk
136
136
  return SinkResultType::NEED_MORE_INPUT;
137
137
  }
138
138
 
139
- void PhysicalFixedBatchCopy::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
140
- LocalSinkState &lstate) const {
141
- auto &state = lstate.Cast<FixedBatchCopyLocalState>();
142
- auto &gstate = gstate_p.Cast<FixedBatchCopyGlobalState>();
139
+ SinkCombineResultType PhysicalFixedBatchCopy::Combine(ExecutionContext &context,
140
+ OperatorSinkCombineInput &input) const {
141
+ auto &state = input.local_state.Cast<FixedBatchCopyLocalState>();
142
+ auto &gstate = input.global_state.Cast<FixedBatchCopyGlobalState>();
143
143
  gstate.rows_copied += state.rows_copied;
144
144
  if (!gstate.any_finished) {
145
145
  // signal that this thread is finished processing batches and that we should move on to Finalize
@@ -147,6 +147,8 @@ void PhysicalFixedBatchCopy::Combine(ExecutionContext &context, GlobalSinkState
147
147
  gstate.any_finished = true;
148
148
  }
149
149
  ExecuteTasks(context.client, gstate);
150
+
151
+ return SinkCombineResultType::FINISHED;
150
152
  }
151
153
 
152
154
  //===--------------------------------------------------------------------===//
@@ -225,16 +227,16 @@ SinkFinalizeType PhysicalFixedBatchCopy::FinalFlush(ClientContext &context, Glob
225
227
  }
226
228
 
227
229
  SinkFinalizeType PhysicalFixedBatchCopy::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
228
- GlobalSinkState &gstate_p) const {
229
- auto &gstate = gstate_p.Cast<FixedBatchCopyGlobalState>();
230
+ OperatorSinkFinalizeInput &input) const {
231
+ auto &gstate = input.global_state.Cast<FixedBatchCopyGlobalState>();
230
232
  idx_t min_batch_index = idx_t(NumericLimits<int64_t>::Maximum());
231
233
  // repartition any remaining batches
232
- RepartitionBatches(context, gstate_p, min_batch_index, true);
234
+ RepartitionBatches(context, input.global_state, min_batch_index, true);
233
235
  // check if we have multiple tasks to execute
234
236
  if (gstate.TaskCount() <= 1) {
235
237
  // we don't - just execute the remaining task and finish flushing to disk
236
- ExecuteTasks(context, gstate_p);
237
- FinalFlush(context, gstate_p);
238
+ ExecuteTasks(context, input.global_state);
239
+ FinalFlush(context, input.global_state);
238
240
  return SinkFinalizeType::READY;
239
241
  }
240
242
  // we have multiple tasks remaining - launch an event to execute the tasks in parallel
@@ -463,19 +463,17 @@ SinkResultType PhysicalInsert::Sink(ExecutionContext &context, DataChunk &chunk,
463
463
  return SinkResultType::NEED_MORE_INPUT;
464
464
  }
465
465
 
466
- void PhysicalInsert::Combine(ExecutionContext &context, GlobalSinkState &gstate_p, LocalSinkState &lstate_p) const {
467
- auto &gstate = gstate_p.Cast<InsertGlobalState>();
468
- auto &lstate = lstate_p.Cast<InsertLocalState>();
466
+ SinkCombineResultType PhysicalInsert::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
467
+ auto &gstate = input.global_state.Cast<InsertGlobalState>();
468
+ auto &lstate = input.local_state.Cast<InsertLocalState>();
469
469
  auto &client_profiler = QueryProfiler::Get(context.client);
470
470
  context.thread.profiler.Flush(*this, lstate.default_executor, "default_executor", 1);
471
471
  client_profiler.Flush(context.thread.profiler);
472
472
 
473
- if (!parallel) {
474
- return;
475
- }
476
- if (!lstate.local_collection) {
477
- return;
473
+ if (!parallel || !lstate.local_collection) {
474
+ return SinkCombineResultType::FINISHED;
478
475
  }
476
+
479
477
  // parallel append: finalize the append
480
478
  TransactionData tdata(0, 0);
481
479
  lstate.local_collection->FinalizeAppend(tdata, lstate.local_append_state);
@@ -500,11 +498,13 @@ void PhysicalInsert::Combine(ExecutionContext &context, GlobalSinkState &gstate_
500
498
  gstate.table.GetStorage().FinalizeOptimisticWriter(context.client, *lstate.writer);
501
499
  gstate.table.GetStorage().LocalMerge(context.client, *lstate.local_collection);
502
500
  }
501
+
502
+ return SinkCombineResultType::FINISHED;
503
503
  }
504
504
 
505
505
  SinkFinalizeType PhysicalInsert::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
506
- GlobalSinkState &state) const {
507
- auto &gstate = state.Cast<InsertGlobalState>();
506
+ OperatorSinkFinalizeInput &input) const {
507
+ auto &gstate = input.global_state.Cast<InsertGlobalState>();
508
508
  if (!parallel && gstate.initialized) {
509
509
  auto &table = gstate.table;
510
510
  auto &storage = table.GetStorage();
@@ -140,11 +140,13 @@ unique_ptr<LocalSinkState> PhysicalUpdate::GetLocalSinkState(ExecutionContext &c
140
140
  return make_uniq<UpdateLocalState>(context.client, expressions, table.GetTypes(), bound_defaults);
141
141
  }
142
142
 
143
- void PhysicalUpdate::Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const {
144
- auto &state = lstate.Cast<UpdateLocalState>();
143
+ SinkCombineResultType PhysicalUpdate::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
144
+ auto &state = input.local_state.Cast<UpdateLocalState>();
145
145
  auto &client_profiler = QueryProfiler::Get(context.client);
146
146
  context.thread.profiler.Flush(*this, state.default_executor, "default_executor", 1);
147
147
  client_profiler.Flush(context.thread.profiler);
148
+
149
+ return SinkCombineResultType::FINISHED;
148
150
  }
149
151
 
150
152
  //===--------------------------------------------------------------------===//
@@ -123,11 +123,10 @@ SinkResultType PhysicalCreateIndex::Sink(ExecutionContext &context, DataChunk &c
123
123
  return SinkResultType::NEED_MORE_INPUT;
124
124
  }
125
125
 
126
- void PhysicalCreateIndex::Combine(ExecutionContext &context, GlobalSinkState &gstate_p,
127
- LocalSinkState &lstate_p) const {
126
+ SinkCombineResultType PhysicalCreateIndex::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
128
127
 
129
- auto &gstate = gstate_p.Cast<CreateIndexGlobalSinkState>();
130
- auto &lstate = lstate_p.Cast<CreateIndexLocalSinkState>();
128
+ auto &gstate = input.global_state.Cast<CreateIndexGlobalSinkState>();
129
+ auto &lstate = input.local_state.Cast<CreateIndexLocalSinkState>();
131
130
 
132
131
  // merge the local index into the global index
133
132
  if (!gstate.global_index->MergeIndexes(*lstate.local_index)) {
@@ -136,14 +135,16 @@ void PhysicalCreateIndex::Combine(ExecutionContext &context, GlobalSinkState &gs
136
135
 
137
136
  // vacuum excess memory
138
137
  gstate.global_index->Vacuum();
138
+
139
+ return SinkCombineResultType::FINISHED;
139
140
  }
140
141
 
141
142
  SinkFinalizeType PhysicalCreateIndex::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
142
- GlobalSinkState &gstate_p) const {
143
+ OperatorSinkFinalizeInput &input) const {
143
144
 
144
145
  // here, we just set the resulting global index as the newly created index of the table
145
146
 
146
- auto &state = gstate_p.Cast<CreateIndexGlobalSinkState>();
147
+ auto &state = input.global_state.Cast<CreateIndexGlobalSinkState>();
147
148
  D_ASSERT(!state.global_index->VerifyAndToString(true).empty());
148
149
 
149
150
  auto &storage = table.GetStorage();
@@ -97,11 +97,12 @@ SinkResultType PhysicalOperator::Sink(ExecutionContext &context, DataChunk &chun
97
97
 
98
98
  // LCOV_EXCL_STOP
99
99
 
100
- void PhysicalOperator::Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const {
100
+ SinkCombineResultType PhysicalOperator::Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const {
101
+ return SinkCombineResultType::FINISHED;
101
102
  }
102
103
 
103
104
  SinkFinalizeType PhysicalOperator::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
104
- GlobalSinkState &gstate) const {
105
+ OperatorSinkFinalizeInput &input) const {
105
106
  return SinkFinalizeType::READY;
106
107
  }
107
108
 
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev1862"
2
+ #define DUCKDB_VERSION "0.8.2-dev1968"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "9b0a6350ab"
5
+ #define DUCKDB_SOURCE_ID "b1d5e20f31"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -13,11 +13,17 @@
13
13
  #include "duckdb/common/exception.hpp"
14
14
  #include "duckdb/common/helper.hpp"
15
15
  #include "duckdb/common/limits.hpp"
16
+ #include "duckdb/common/numeric_utils.hpp"
16
17
 
17
18
  namespace duckdb {
18
19
 
19
20
  using bitpacking_width_t = uint8_t;
20
21
 
22
+ struct HugeIntPacker {
23
+ static void Pack(const hugeint_t *__restrict in, uint32_t *__restrict out, bitpacking_width_t width);
24
+ static void Unpack(const uint32_t *__restrict in, hugeint_t *__restrict out, bitpacking_width_t width);
25
+ };
26
+
21
27
  class BitpackingPrimitives {
22
28
 
23
29
  public:
@@ -37,9 +43,7 @@ public:
37
43
  idx_t misaligned_count = count % BITPACKING_ALGORITHM_GROUP_SIZE;
38
44
  T tmp_buffer[BITPACKING_ALGORITHM_GROUP_SIZE]; // TODO maybe faster on the heap?
39
45
 
40
- if (misaligned_count) {
41
- count -= misaligned_count;
42
- }
46
+ count -= misaligned_count;
43
47
 
44
48
  for (idx_t i = 0; i < count; i += BITPACKING_ALGORITHM_GROUP_SIZE) {
45
49
  PackGroup<T>(dst + (i * width) / 8, src + i, width);
@@ -78,22 +82,22 @@ public:
78
82
  }
79
83
 
80
84
  // Calculates the minimum required number of bits per value that can store all values
81
- template <class T>
85
+ template <class T, bool is_signed = NumericLimits<T>::IsSigned()>
82
86
  inline static bitpacking_width_t MinimumBitWidth(T value) {
83
- return FindMinimumBitWidth<T, BYTE_ALIGNED>(value, value);
87
+ return FindMinimumBitWidth<T, is_signed, BYTE_ALIGNED>(value, value);
84
88
  }
85
89
 
86
90
  // Calculates the minimum required number of bits per value that can store all values
87
- template <class T>
91
+ template <class T, bool is_signed = NumericLimits<T>::IsSigned()>
88
92
  inline static bitpacking_width_t MinimumBitWidth(T *values, idx_t count) {
89
- return FindMinimumBitWidth<T, BYTE_ALIGNED>(values, count);
93
+ return FindMinimumBitWidth<T, is_signed, BYTE_ALIGNED>(values, count);
90
94
  }
91
95
 
92
96
  // Calculates the minimum required number of bits per value that can store all values,
93
97
  // given a predetermined minimum and maximum value of the buffer
94
- template <class T>
98
+ template <class T, bool is_signed = NumericLimits<T>::IsSigned()>
95
99
  inline static bitpacking_width_t MinimumBitWidth(T minimum, T maximum) {
96
- return FindMinimumBitWidth<T, BYTE_ALIGNED>(minimum, maximum);
100
+ return FindMinimumBitWidth<T, is_signed, BYTE_ALIGNED>(minimum, maximum);
97
101
  }
98
102
 
99
103
  inline static idx_t GetRequiredSize(idx_t count, bitpacking_width_t width) {
@@ -112,7 +116,7 @@ public:
112
116
  }
113
117
 
114
118
  private:
115
- template <class T, bool round_to_next_byte = false>
119
+ template <class T, bool is_signed, bool round_to_next_byte = false>
116
120
  static bitpacking_width_t FindMinimumBitWidth(T *values, idx_t count) {
117
121
  T min_value = values[0];
118
122
  T max_value = values[0];
@@ -122,7 +126,7 @@ private:
122
126
  max_value = values[i];
123
127
  }
124
128
 
125
- if (std::is_signed<T>::value) {
129
+ if (is_signed) {
126
130
  if (values[i] < min_value) {
127
131
  min_value = values[i];
128
132
  }
@@ -132,12 +136,12 @@ private:
132
136
  return FindMinimumBitWidth<T, round_to_next_byte>(min_value, max_value);
133
137
  }
134
138
 
135
- template <class T, bool round_to_next_byte = false>
139
+ template <class T, bool is_signed, bool round_to_next_byte = false>
136
140
  static bitpacking_width_t FindMinimumBitWidth(T min_value, T max_value) {
137
141
  bitpacking_width_t bitwidth;
138
142
  T value;
139
143
 
140
- if (std::is_signed<T>::value) {
144
+ if (is_signed) {
141
145
  if (min_value == NumericLimits<T>::Minimum()) {
142
146
  // handle special case of the minimal value, as it cannot be negated like all other values.
143
147
  return sizeof(T) * 8;
@@ -152,7 +156,7 @@ private:
152
156
  return 0;
153
157
  }
154
158
 
155
- if (std::is_signed<T>::value) {
159
+ if (is_signed) {
156
160
  bitwidth = 1;
157
161
  } else {
158
162
  bitwidth = 0;
@@ -168,58 +172,37 @@ private:
168
172
  // Assert results are correct
169
173
  #ifdef DEBUG
170
174
  if (bitwidth < sizeof(T) * 8 && bitwidth != 0) {
171
- if (std::is_signed<T>::value) {
172
- D_ASSERT((int64_t)max_value <= (int64_t)(1L << (bitwidth - 1)) - 1);
173
- D_ASSERT((int64_t)min_value >= (int64_t)(-1 * ((1L << (bitwidth - 1)) - 1) - 1));
175
+ if (is_signed) {
176
+ D_ASSERT(max_value <= (T(1) << (bitwidth - 1)) - 1);
177
+ D_ASSERT(min_value >= (T(-1) * ((T(1) << (bitwidth - 1)) - 1) - 1));
174
178
  } else {
175
- D_ASSERT((uint64_t)max_value <= (uint64_t)(1L << (bitwidth)) - 1);
179
+ D_ASSERT(max_value <= (T(1) << (bitwidth)) - 1);
176
180
  }
177
181
  }
178
182
  #endif
179
183
  if (round_to_next_byte) {
180
184
  return (bitwidth / 8 + (bitwidth % 8 != 0)) * 8;
181
- } else {
182
- return bitwidth;
183
185
  }
186
+ return bitwidth;
184
187
  }
185
188
 
186
189
  // Sign bit extension
187
- template <class T, class T_U = typename std::make_unsigned<T>::type>
190
+ template <class T, class T_U = typename MakeUnsigned<T>::type>
188
191
  static void SignExtend(data_ptr_t dst, bitpacking_width_t width) {
189
- T const mask = ((T_U)1) << (width - 1);
192
+ T const mask = T_U(1) << (width - 1);
190
193
  for (idx_t i = 0; i < BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE; ++i) {
191
194
  T value = Load<T>(dst + i * sizeof(T));
192
- value = value & ((((T_U)1) << width) - ((T_U)1));
195
+ value = value & ((T_U(1) << width) - T_U(1));
193
196
  T result = (value ^ mask) - mask;
194
197
  Store(result, dst + i * sizeof(T));
195
198
  }
196
199
  }
197
200
 
198
- template <class T>
199
- static void UnPackGroup(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width,
200
- bool skip_sign_extension = false) {
201
- if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) {
202
- duckdb_fastpforlib::fastunpack((const uint8_t *)src, (uint8_t *)dst, (uint32_t)width);
203
- } else if (std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value) {
204
- duckdb_fastpforlib::fastunpack((const uint16_t *)src, (uint16_t *)dst, (uint32_t)width);
205
- } else if (std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value) {
206
- duckdb_fastpforlib::fastunpack((const uint32_t *)src, (uint32_t *)dst, (uint32_t)width);
207
- } else if (std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value) {
208
- duckdb_fastpforlib::fastunpack((const uint32_t *)src, (uint64_t *)dst, (uint32_t)width);
209
- } else {
210
- throw InternalException("Unsupported type found in bitpacking.");
211
- }
212
-
213
- if (NumericLimits<T>::IsSigned() && !skip_sign_extension && width > 0 && width < sizeof(T) * 8) {
214
- SignExtend<T>(dst, width);
215
- }
216
- }
217
-
218
201
  // Prevent compression at widths that are ineffective
219
202
  template <class T>
220
203
  static bitpacking_width_t GetEffectiveWidth(bitpacking_width_t width) {
221
- auto bits_of_type = sizeof(T) * 8;
222
- auto type_size = sizeof(T);
204
+ bitpacking_width_t bits_of_type = sizeof(T) * 8;
205
+ bitpacking_width_t type_size = sizeof(T);
223
206
  if (width + type_size > bits_of_type) {
224
207
  return bits_of_type;
225
208
  }
@@ -227,17 +210,49 @@ private:
227
210
  }
228
211
 
229
212
  template <class T>
230
- static void PackGroup(data_ptr_t dst, T *values, bitpacking_width_t width) {
231
- if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) {
232
- duckdb_fastpforlib::fastpack((const uint8_t *)values, (uint8_t *)dst, (uint32_t)width);
233
- } else if (std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value) {
234
- duckdb_fastpforlib::fastpack((const uint16_t *)values, (uint16_t *)dst, (uint32_t)width);
235
- } else if (std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value) {
236
- duckdb_fastpforlib::fastpack((const uint32_t *)values, (uint32_t *)dst, (uint32_t)width);
237
- } else if (std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value) {
238
- duckdb_fastpforlib::fastpack((const uint64_t *)values, (uint32_t *)dst, (uint32_t)width);
213
+ static inline void PackGroup(data_ptr_t dst, T *values, bitpacking_width_t width) {
214
+ if (std::is_same<T, int8_t>::value || std::is_same<T, uint8_t>::value) {
215
+ duckdb_fastpforlib::fastpack(reinterpret_cast<const uint8_t *>(values), reinterpret_cast<uint8_t *>(dst),
216
+ static_cast<uint32_t>(width));
217
+ } else if (std::is_same<T, int16_t>::value || std::is_same<T, uint16_t>::value) {
218
+ duckdb_fastpforlib::fastpack(reinterpret_cast<const uint16_t *>(values), reinterpret_cast<uint16_t *>(dst),
219
+ static_cast<uint32_t>(width));
220
+ } else if (std::is_same<T, int32_t>::value || std::is_same<T, uint32_t>::value) {
221
+ duckdb_fastpforlib::fastpack(reinterpret_cast<const uint32_t *>(values), reinterpret_cast<uint32_t *>(dst),
222
+ static_cast<uint32_t>(width));
223
+ } else if (std::is_same<T, int64_t>::value || std::is_same<T, uint64_t>::value) {
224
+ duckdb_fastpforlib::fastpack(reinterpret_cast<const uint64_t *>(values), reinterpret_cast<uint32_t *>(dst),
225
+ static_cast<uint32_t>(width));
226
+ } else if (std::is_same<T, hugeint_t>::value) {
227
+ HugeIntPacker::Pack(reinterpret_cast<const hugeint_t *>(values), reinterpret_cast<uint32_t *>(dst), width);
239
228
  } else {
240
- throw InternalException("Unsupported type found in bitpacking.");
229
+ throw InternalException("Unsupported type for bitpacking");
230
+ }
231
+ }
232
+
233
+ template <class T>
234
+ static inline void UnPackGroup(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width,
235
+ bool skip_sign_extension = false) {
236
+ if (std::is_same<T, int8_t>::value || std::is_same<T, uint8_t>::value) {
237
+ duckdb_fastpforlib::fastunpack(reinterpret_cast<const uint8_t *>(src), reinterpret_cast<uint8_t *>(dst),
238
+ static_cast<uint32_t>(width));
239
+ } else if (std::is_same<T, int16_t>::value || std::is_same<T, uint16_t>::value) {
240
+ duckdb_fastpforlib::fastunpack(reinterpret_cast<const uint16_t *>(src), reinterpret_cast<uint16_t *>(dst),
241
+ static_cast<uint32_t>(width));
242
+ } else if (std::is_same<T, int32_t>::value || std::is_same<T, uint32_t>::value) {
243
+ duckdb_fastpforlib::fastunpack(reinterpret_cast<const uint32_t *>(src), reinterpret_cast<uint32_t *>(dst),
244
+ static_cast<uint32_t>(width));
245
+ } else if (std::is_same<T, int64_t>::value || std::is_same<T, uint64_t>::value) {
246
+ duckdb_fastpforlib::fastunpack(reinterpret_cast<const uint32_t *>(src), reinterpret_cast<uint64_t *>(dst),
247
+ static_cast<uint32_t>(width));
248
+ } else if (std::is_same<T, hugeint_t>::value) {
249
+ HugeIntPacker::Unpack(reinterpret_cast<const uint32_t *>(src), reinterpret_cast<hugeint_t *>(dst), width);
250
+ } else {
251
+ throw InternalException("Unsupported type for bitpacking");
252
+ }
253
+
254
+ if (NumericLimits<T>::IsSigned() && !skip_sign_extension && width > 0 && width < sizeof(T) * 8) {
255
+ SignExtend<T>(dst, width);
241
256
  }
242
257
  }
243
258
  };