duckdb 0.7.2-dev1034.0 → 0.7.2-dev1138.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  3. package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
  4. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
  5. package/src/duckdb/src/common/serializer/enum_serializer.cpp +6 -6
  6. package/src/duckdb/src/common/sort/comparators.cpp +14 -5
  7. package/src/duckdb/src/common/types/interval.cpp +0 -41
  8. package/src/duckdb/src/common/types/list_segment.cpp +658 -0
  9. package/src/duckdb/src/common/types/string_heap.cpp +1 -1
  10. package/src/duckdb/src/common/types/string_type.cpp +1 -1
  11. package/src/duckdb/src/common/types/vector.cpp +1 -1
  12. package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
  13. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
  14. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
  15. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  16. package/src/duckdb/src/execution/index/art/art.cpp +13 -0
  17. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
  18. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
  19. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +1 -0
  20. package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
  21. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +5 -1
  22. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +18 -5
  23. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
  24. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
  25. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
  26. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
  27. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +0 -4
  28. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +1 -0
  29. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
  30. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
  31. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -0
  32. package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
  33. package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
  34. package/src/duckdb/src/function/table/read_csv.cpp +5 -0
  35. package/src/duckdb/src/function/table/table_scan.cpp +8 -11
  36. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  37. package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
  39. package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
  40. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
  41. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
  42. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
  43. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -12
  44. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +4 -0
  45. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  46. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -0
  47. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +0 -2
  49. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  50. package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +18 -7
  53. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -3
  54. package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
  55. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +0 -1
  56. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +35 -43
  57. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +18 -5
  58. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -4
  59. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -29
  60. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +2 -3
  61. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +11 -1
  62. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
  63. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +4 -1
  64. package/src/duckdb/src/include/duckdb.h +21 -0
  65. package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
  66. package/src/duckdb/src/main/settings/settings.cpp +20 -8
  67. package/src/duckdb/src/optimizer/filter_combiner.cpp +2 -5
  68. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -0
  69. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +1 -0
  70. package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -3
  71. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +22 -0
  72. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
  73. package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
  74. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -1
  75. package/src/duckdb/src/storage/compression/numeric_constant.cpp +1 -1
  76. package/src/duckdb/src/storage/compression/rle.cpp +1 -0
  77. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
  78. package/src/duckdb/src/storage/data_table.cpp +3 -3
  79. package/src/duckdb/src/storage/local_storage.cpp +7 -0
  80. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  81. package/src/duckdb/src/storage/table/column_data.cpp +75 -18
  82. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -1
  83. package/src/duckdb/src/storage/table/column_segment.cpp +17 -31
  84. package/src/duckdb/src/storage/table/list_column_data.cpp +9 -12
  85. package/src/duckdb/src/storage/table/row_group.cpp +200 -136
  86. package/src/duckdb/src/storage/table/row_group_collection.cpp +75 -45
  87. package/src/duckdb/src/storage/table/scan_state.cpp +31 -38
  88. package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -6
  89. package/src/duckdb/src/storage/table/struct_column_data.cpp +11 -18
  90. package/src/duckdb/src/storage/table/update_segment.cpp +3 -0
  91. package/src/duckdb/ub_src_common_types.cpp +2 -0
@@ -3,6 +3,8 @@
3
3
  #include "duckdb/storage/table/update_segment.hpp"
4
4
  #include "duckdb/storage/data_table.hpp"
5
5
  #include "duckdb/parser/column_definition.hpp"
6
+ #include "duckdb/storage/table/scan_state.hpp"
7
+
6
8
  namespace duckdb {
7
9
 
8
10
  ColumnDataCheckpointer::ColumnDataCheckpointer(ColumnData &col_data_p, RowGroup &row_group_p,
@@ -161,7 +163,7 @@ void ColumnDataCheckpointer::WriteToDisk() {
161
163
  // first we check the current segments
162
164
  // if there are any persistent segments, we will mark their old block ids as modified
163
165
  // since the segments will be rewritten their old on disk data is no longer required
164
- auto &block_manager = col_data.block_manager;
166
+ auto &block_manager = col_data.GetBlockManager();
165
167
  for (idx_t segment_idx = 0; segment_idx < nodes.size(); segment_idx++) {
166
168
  auto segment = nodes[segment_idx].node.get();
167
169
  if (segment->segment_type == ColumnSegmentType::PERSISTENT) {
@@ -3,13 +3,12 @@
3
3
  #include "duckdb/storage/table/update_segment.hpp"
4
4
  #include "duckdb/common/types/null_value.hpp"
5
5
  #include "duckdb/common/types/vector.hpp"
6
- #include "duckdb/common/vector_operations/vector_operations.hpp"
7
6
  #include "duckdb/storage/table/append_state.hpp"
8
7
  #include "duckdb/storage/storage_manager.hpp"
9
8
  #include "duckdb/planner/filter/conjunction_filter.hpp"
10
9
  #include "duckdb/planner/filter/constant_filter.hpp"
11
- #include "duckdb/planner/filter/null_filter.hpp"
12
10
  #include "duckdb/main/config.hpp"
11
+ #include "duckdb/storage/table/scan_state.hpp"
13
12
 
14
13
  #include <cstring>
15
14
 
@@ -217,12 +216,12 @@ void ColumnSegment::MarkAsPersistent(shared_ptr<BlockHandle> block_p, uint32_t o
217
216
  // Filter Selection
218
217
  //===--------------------------------------------------------------------===//
219
218
  template <class T, class OP, bool HAS_NULL>
220
- static idx_t TemplatedFilterSelection(T *vec, T *predicate, SelectionVector &sel, idx_t approved_tuple_count,
219
+ static idx_t TemplatedFilterSelection(T *vec, T predicate, SelectionVector &sel, idx_t approved_tuple_count,
221
220
  ValidityMask &mask, SelectionVector &result_sel) {
222
221
  idx_t result_count = 0;
223
222
  for (idx_t i = 0; i < approved_tuple_count; i++) {
224
223
  auto idx = sel.get_index(i);
225
- if ((!HAS_NULL || mask.RowIsValid(idx)) && OP::Operation(vec[idx], *predicate)) {
224
+ if ((!HAS_NULL || mask.RowIsValid(idx)) && OP::Operation(vec[idx], predicate)) {
226
225
  result_sel.set_index(result_count++, idx);
227
226
  }
228
227
  }
@@ -230,7 +229,7 @@ static idx_t TemplatedFilterSelection(T *vec, T *predicate, SelectionVector &sel
230
229
  }
231
230
 
232
231
  template <class T>
233
- static void FilterSelectionSwitch(T *vec, T *predicate, SelectionVector &sel, idx_t &approved_tuple_count,
232
+ static void FilterSelectionSwitch(T *vec, T predicate, SelectionVector &sel, idx_t &approved_tuple_count,
234
233
  ExpressionType comparison_type, ValidityMask &mask) {
235
234
  SelectionVector new_sel(approved_tuple_count);
236
235
  // the inplace loops take the result as the last parameter
@@ -371,104 +370,91 @@ idx_t ColumnSegment::FilterSelection(SelectionVector &sel, Vector &result, const
371
370
  switch (result.GetType().InternalType()) {
372
371
  case PhysicalType::UINT8: {
373
372
  auto result_flat = FlatVector::GetData<uint8_t>(result);
374
- Vector predicate_vector(constant_filter.constant);
375
- auto predicate = FlatVector::GetData<uint8_t>(predicate_vector);
373
+ auto predicate = UTinyIntValue::Get(constant_filter.constant);
376
374
  FilterSelectionSwitch<uint8_t>(result_flat, predicate, sel, approved_tuple_count,
377
375
  constant_filter.comparison_type, mask);
378
376
  break;
379
377
  }
380
378
  case PhysicalType::UINT16: {
381
379
  auto result_flat = FlatVector::GetData<uint16_t>(result);
382
- Vector predicate_vector(constant_filter.constant);
383
- auto predicate = FlatVector::GetData<uint16_t>(predicate_vector);
380
+ auto predicate = USmallIntValue::Get(constant_filter.constant);
384
381
  FilterSelectionSwitch<uint16_t>(result_flat, predicate, sel, approved_tuple_count,
385
382
  constant_filter.comparison_type, mask);
386
383
  break;
387
384
  }
388
385
  case PhysicalType::UINT32: {
389
386
  auto result_flat = FlatVector::GetData<uint32_t>(result);
390
- Vector predicate_vector(constant_filter.constant);
391
- auto predicate = FlatVector::GetData<uint32_t>(predicate_vector);
387
+ auto predicate = UIntegerValue::Get(constant_filter.constant);
392
388
  FilterSelectionSwitch<uint32_t>(result_flat, predicate, sel, approved_tuple_count,
393
389
  constant_filter.comparison_type, mask);
394
390
  break;
395
391
  }
396
392
  case PhysicalType::UINT64: {
397
393
  auto result_flat = FlatVector::GetData<uint64_t>(result);
398
- Vector predicate_vector(constant_filter.constant);
399
- auto predicate = FlatVector::GetData<uint64_t>(predicate_vector);
394
+ auto predicate = UBigIntValue::Get(constant_filter.constant);
400
395
  FilterSelectionSwitch<uint64_t>(result_flat, predicate, sel, approved_tuple_count,
401
396
  constant_filter.comparison_type, mask);
402
397
  break;
403
398
  }
404
399
  case PhysicalType::INT8: {
405
400
  auto result_flat = FlatVector::GetData<int8_t>(result);
406
- Vector predicate_vector(constant_filter.constant);
407
- auto predicate = FlatVector::GetData<int8_t>(predicate_vector);
401
+ auto predicate = TinyIntValue::Get(constant_filter.constant);
408
402
  FilterSelectionSwitch<int8_t>(result_flat, predicate, sel, approved_tuple_count,
409
403
  constant_filter.comparison_type, mask);
410
404
  break;
411
405
  }
412
406
  case PhysicalType::INT16: {
413
407
  auto result_flat = FlatVector::GetData<int16_t>(result);
414
- Vector predicate_vector(constant_filter.constant);
415
- auto predicate = FlatVector::GetData<int16_t>(predicate_vector);
408
+ auto predicate = SmallIntValue::Get(constant_filter.constant);
416
409
  FilterSelectionSwitch<int16_t>(result_flat, predicate, sel, approved_tuple_count,
417
410
  constant_filter.comparison_type, mask);
418
411
  break;
419
412
  }
420
413
  case PhysicalType::INT32: {
421
414
  auto result_flat = FlatVector::GetData<int32_t>(result);
422
- Vector predicate_vector(constant_filter.constant);
423
- auto predicate = FlatVector::GetData<int32_t>(predicate_vector);
415
+ auto predicate = IntegerValue::Get(constant_filter.constant);
424
416
  FilterSelectionSwitch<int32_t>(result_flat, predicate, sel, approved_tuple_count,
425
417
  constant_filter.comparison_type, mask);
426
418
  break;
427
419
  }
428
420
  case PhysicalType::INT64: {
429
421
  auto result_flat = FlatVector::GetData<int64_t>(result);
430
- Vector predicate_vector(constant_filter.constant);
431
- auto predicate = FlatVector::GetData<int64_t>(predicate_vector);
422
+ auto predicate = BigIntValue::Get(constant_filter.constant);
432
423
  FilterSelectionSwitch<int64_t>(result_flat, predicate, sel, approved_tuple_count,
433
424
  constant_filter.comparison_type, mask);
434
425
  break;
435
426
  }
436
427
  case PhysicalType::INT128: {
437
428
  auto result_flat = FlatVector::GetData<hugeint_t>(result);
438
- Vector predicate_vector(constant_filter.constant);
439
- auto predicate = FlatVector::GetData<hugeint_t>(predicate_vector);
429
+ auto predicate = HugeIntValue::Get(constant_filter.constant);
440
430
  FilterSelectionSwitch<hugeint_t>(result_flat, predicate, sel, approved_tuple_count,
441
431
  constant_filter.comparison_type, mask);
442
432
  break;
443
433
  }
444
434
  case PhysicalType::FLOAT: {
445
435
  auto result_flat = FlatVector::GetData<float>(result);
446
- Vector predicate_vector(constant_filter.constant);
447
- auto predicate = FlatVector::GetData<float>(predicate_vector);
436
+ auto predicate = FloatValue::Get(constant_filter.constant);
448
437
  FilterSelectionSwitch<float>(result_flat, predicate, sel, approved_tuple_count,
449
438
  constant_filter.comparison_type, mask);
450
439
  break;
451
440
  }
452
441
  case PhysicalType::DOUBLE: {
453
442
  auto result_flat = FlatVector::GetData<double>(result);
454
- Vector predicate_vector(constant_filter.constant);
455
- auto predicate = FlatVector::GetData<double>(predicate_vector);
443
+ auto predicate = DoubleValue::Get(constant_filter.constant);
456
444
  FilterSelectionSwitch<double>(result_flat, predicate, sel, approved_tuple_count,
457
445
  constant_filter.comparison_type, mask);
458
446
  break;
459
447
  }
460
448
  case PhysicalType::VARCHAR: {
461
449
  auto result_flat = FlatVector::GetData<string_t>(result);
462
- Vector predicate_vector(constant_filter.constant);
463
- auto predicate = FlatVector::GetData<string_t>(predicate_vector);
450
+ auto predicate = string_t(StringValue::Get(constant_filter.constant));
464
451
  FilterSelectionSwitch<string_t>(result_flat, predicate, sel, approved_tuple_count,
465
452
  constant_filter.comparison_type, mask);
466
453
  break;
467
454
  }
468
455
  case PhysicalType::BOOL: {
469
456
  auto result_flat = FlatVector::GetData<bool>(result);
470
- Vector predicate_vector(constant_filter.constant);
471
- auto predicate = FlatVector::GetData<bool>(predicate_vector);
457
+ auto predicate = BooleanValue::Get(constant_filter.constant);
472
458
  FilterSelectionSwitch<bool>(result_flat, predicate, sel, approved_tuple_count,
473
459
  constant_filter.comparison_type, mask);
474
460
  break;
@@ -2,6 +2,8 @@
2
2
  #include "duckdb/storage/statistics/list_stats.hpp"
3
3
  #include "duckdb/transaction/transaction.hpp"
4
4
  #include "duckdb/storage/table/column_checkpoint_state.hpp"
5
+ #include "duckdb/storage/table/append_state.hpp"
6
+ #include "duckdb/storage/table/scan_state.hpp"
5
7
 
6
8
  namespace duckdb {
7
9
 
@@ -30,14 +32,11 @@ void ListColumnData::InitializeScan(ColumnScanState &state) {
30
32
  ColumnData::InitializeScan(state);
31
33
 
32
34
  // initialize the validity segment
33
- ColumnScanState validity_state;
34
- validity.InitializeScan(validity_state);
35
- state.child_states.push_back(std::move(validity_state));
35
+ D_ASSERT(state.child_states.size() == 2);
36
+ validity.InitializeScan(state.child_states[0]);
36
37
 
37
38
  // initialize the child scan
38
- ColumnScanState child_state;
39
- child_column->InitializeScan(child_state);
40
- state.child_states.push_back(std::move(child_state));
39
+ child_column->InitializeScan(state.child_states[1]);
41
40
  }
42
41
 
43
42
  uint64_t ListColumnData::FetchListOffset(idx_t row_idx) {
@@ -58,19 +57,16 @@ void ListColumnData::InitializeScanWithOffset(ColumnScanState &state, idx_t row_
58
57
  ColumnData::InitializeScanWithOffset(state, row_idx);
59
58
 
60
59
  // initialize the validity segment
61
- ColumnScanState validity_state;
62
- validity.InitializeScanWithOffset(validity_state, row_idx);
63
- state.child_states.push_back(std::move(validity_state));
60
+ D_ASSERT(state.child_states.size() == 2);
61
+ validity.InitializeScanWithOffset(state.child_states[0], row_idx);
64
62
 
65
63
  // we need to read the list at position row_idx to get the correct row offset of the child
66
64
  auto child_offset = row_idx == start ? 0 : FetchListOffset(row_idx - 1);
67
65
 
68
66
  D_ASSERT(child_offset <= child_column->GetMaxEntry());
69
- ColumnScanState child_state;
70
67
  if (child_offset < child_column->GetMaxEntry()) {
71
- child_column->InitializeScanWithOffset(child_state, start + child_offset);
68
+ child_column->InitializeScanWithOffset(state.child_states[1], start + child_offset);
72
69
  }
73
- state.child_states.push_back(std::move(child_state));
74
70
  }
75
71
 
76
72
  idx_t ListColumnData::Scan(TransactionData transaction, idx_t vector_index, ColumnScanState &state, Vector &result) {
@@ -290,6 +286,7 @@ void ListColumnData::FetchRow(TransactionData transaction, ColumnFetchState &sta
290
286
  auto &child_type = ListType::GetChildType(result.GetType());
291
287
  Vector child_scan(child_type, child_scan_count);
292
288
  // seek the scan towards the specified position and read [length] entries
289
+ child_state->Initialize(child_type);
293
290
  child_column->InitializeScanWithOffset(*child_state, start + start_offset);
294
291
  D_ASSERT(child_type.InternalType() == PhysicalType::STRUCT ||
295
292
  child_state->row_index + child_scan_count - this->start <= child_column->GetMaxEntry());