duckdb 0.7.2-dev1034.0 → 0.7.2-dev1146.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  3. package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -1
  4. package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
  5. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
  6. package/src/duckdb/src/common/serializer/enum_serializer.cpp +6 -6
  7. package/src/duckdb/src/common/sort/comparators.cpp +14 -5
  8. package/src/duckdb/src/common/types/column_data_collection_segment.cpp +1 -4
  9. package/src/duckdb/src/common/types/interval.cpp +0 -41
  10. package/src/duckdb/src/common/types/list_segment.cpp +658 -0
  11. package/src/duckdb/src/common/types/string_heap.cpp +1 -1
  12. package/src/duckdb/src/common/types/string_type.cpp +1 -1
  13. package/src/duckdb/src/common/types/validity_mask.cpp +24 -7
  14. package/src/duckdb/src/common/types/vector.cpp +3 -7
  15. package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
  16. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
  17. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
  18. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  19. package/src/duckdb/src/execution/index/art/art.cpp +13 -0
  20. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
  21. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
  22. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +1 -0
  23. package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
  24. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +5 -1
  25. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +18 -5
  26. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
  27. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
  28. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
  29. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
  30. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +0 -4
  31. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +1 -0
  32. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
  33. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
  34. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +18 -10
  35. package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
  36. package/src/duckdb/src/function/cast_rules.cpp +9 -4
  37. package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
  38. package/src/duckdb/src/function/table/read_csv.cpp +5 -0
  39. package/src/duckdb/src/function/table/table_scan.cpp +8 -11
  40. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  41. package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
  42. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
  43. package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
  44. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
  45. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
  46. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
  47. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
  48. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -12
  49. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +4 -0
  50. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  51. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -0
  52. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -0
  53. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +0 -2
  54. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  55. package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
  56. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +18 -7
  58. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -3
  59. package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
  60. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +0 -1
  61. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +35 -43
  62. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +18 -5
  63. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -4
  64. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -29
  65. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +2 -3
  66. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +11 -1
  67. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
  68. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +4 -1
  69. package/src/duckdb/src/include/duckdb.h +21 -0
  70. package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
  71. package/src/duckdb/src/main/settings/settings.cpp +20 -8
  72. package/src/duckdb/src/optimizer/filter_combiner.cpp +2 -5
  73. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -0
  74. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +1 -0
  75. package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -3
  76. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +22 -0
  77. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
  78. package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
  79. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -1
  80. package/src/duckdb/src/storage/compression/numeric_constant.cpp +1 -1
  81. package/src/duckdb/src/storage/compression/rle.cpp +1 -0
  82. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
  83. package/src/duckdb/src/storage/data_table.cpp +3 -3
  84. package/src/duckdb/src/storage/local_storage.cpp +7 -0
  85. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  86. package/src/duckdb/src/storage/table/column_data.cpp +75 -18
  87. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -1
  88. package/src/duckdb/src/storage/table/column_segment.cpp +17 -31
  89. package/src/duckdb/src/storage/table/list_column_data.cpp +9 -12
  90. package/src/duckdb/src/storage/table/row_group.cpp +200 -136
  91. package/src/duckdb/src/storage/table/row_group_collection.cpp +75 -45
  92. package/src/duckdb/src/storage/table/scan_state.cpp +31 -38
  93. package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -6
  94. package/src/duckdb/src/storage/table/struct_column_data.cpp +11 -18
  95. package/src/duckdb/src/storage/table/update_segment.cpp +3 -0
  96. package/src/duckdb/ub_src_common_types.cpp +2 -0
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/execution/radix_partitioned_hashtable.hpp"
2
2
  #include "duckdb/parallel/task_scheduler.hpp"
3
3
  #include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
4
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
4
5
  #include "duckdb/parallel/event.hpp"
5
6
 
6
7
  namespace duckdb {
@@ -52,10 +53,13 @@ RadixPartitionedHashTable::RadixPartitionedHashTable(GroupingSet &grouping_set_p
52
53
  // Sink
53
54
  //===--------------------------------------------------------------------===//
54
55
  class RadixHTGlobalState : public GlobalSinkState {
56
+ constexpr const static idx_t MAX_RADIX_PARTITIONS = 32;
57
+
55
58
  public:
56
59
  explicit RadixHTGlobalState(ClientContext &context)
57
- : is_empty(true), multi_scan(true), total_groups(0),
58
- partition_info((idx_t)TaskScheduler::GetScheduler(context).NumberOfThreads()) {
60
+ : is_empty(true), multi_scan(true), partitioned(false),
61
+ partition_info(
62
+ MinValue<idx_t>(MAX_RADIX_PARTITIONS, TaskScheduler::GetScheduler(context).NumberOfThreads())) {
59
63
  }
60
64
 
61
65
  vector<unique_ptr<PartitionableHashTable>> intermediate_hts;
@@ -67,8 +71,8 @@ public:
67
71
  bool multi_scan;
68
72
  //! The lock for updating the global aggregate state
69
73
  mutex lock;
70
- //! a counter to determine if we should switch over to partitioning
71
- atomic<idx_t> total_groups;
74
+ //! Whether or not any thread has crossed the partitioning threshold
75
+ atomic<bool> partitioned;
72
76
 
73
77
  bool is_finalized = false;
74
78
  bool is_partitioned = false;
@@ -78,7 +82,7 @@ public:
78
82
 
79
83
  class RadixHTLocalState : public LocalSinkState {
80
84
  public:
81
- explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : is_empty(true) {
85
+ explicit RadixHTLocalState(const RadixPartitionedHashTable &ht) : total_groups(0), is_empty(true) {
82
86
  // if there are no groups we create a fake group so everything has the same group
83
87
  group_chunk.InitializeEmpty(ht.group_types);
84
88
  if (ht.grouping_set.empty()) {
@@ -89,6 +93,8 @@ public:
89
93
  DataChunk group_chunk;
90
94
  //! The aggregate HT
91
95
  unique_ptr<PartitionableHashTable> ht;
96
+ //! The total number of groups found by this thread
97
+ idx_t total_groups;
92
98
 
93
99
  //! Whether or not any tuples were added to the HT
94
100
  bool is_empty;
@@ -145,7 +151,7 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState
145
151
  }
146
152
  D_ASSERT(gstate.finalized_hts.size() == 1);
147
153
  D_ASSERT(gstate.finalized_hts[0]);
148
- gstate.total_groups += gstate.finalized_hts[0]->AddChunk(group_chunk, payload_input, filter);
154
+ llstate.total_groups += gstate.finalized_hts[0]->AddChunk(group_chunk, payload_input, filter);
149
155
  return;
150
156
  }
151
157
 
@@ -159,9 +165,11 @@ void RadixPartitionedHashTable::Sink(ExecutionContext &context, GlobalSinkState
159
165
  group_types, op.payload_types, op.bindings);
160
166
  }
161
167
 
162
- gstate.total_groups +=
163
- llstate.ht->AddChunk(group_chunk, payload_input,
164
- gstate.total_groups > radix_limit && gstate.partition_info.n_partitions > 1, filter);
168
+ llstate.total_groups += llstate.ht->AddChunk(group_chunk, payload_input,
169
+ gstate.partitioned && gstate.partition_info.n_partitions > 1, filter);
170
+ if (llstate.total_groups >= radix_limit) {
171
+ gstate.partitioned = true;
172
+ }
165
173
  }
166
174
 
167
175
  void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkState &state,
@@ -182,7 +190,7 @@ void RadixPartitionedHashTable::Combine(ExecutionContext &context, GlobalSinkSta
182
190
  return; // no data
183
191
  }
184
192
 
185
- if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.total_groups > radix_limit) {
193
+ if (!llstate.ht->IsPartitioned() && gstate.partition_info.n_partitions > 1 && gstate.partitioned) {
186
194
  llstate.ht->Partition();
187
195
  }
188
196