duckdb 0.7.2-dev16.0 → 0.7.2-dev225.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-extension.cpp +2 -0
  4. package/src/duckdb/extension/icu/icu-table-range.cpp +194 -0
  5. package/src/duckdb/extension/icu/include/icu-table-range.hpp +17 -0
  6. package/src/duckdb/extension/parquet/column_writer.cpp +0 -1
  7. package/src/duckdb/extension/parquet/parquet-extension.cpp +11 -2
  8. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +4 -0
  9. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +7 -6
  10. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +20 -1
  11. package/src/duckdb/src/common/enums/statement_type.cpp +2 -0
  12. package/src/duckdb/src/common/types/bit.cpp +95 -58
  13. package/src/duckdb/src/common/types/value.cpp +149 -53
  14. package/src/duckdb/src/common/types/vector.cpp +13 -10
  15. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +1 -1
  16. package/src/duckdb/src/function/aggregate/algebraic/avg.cpp +0 -6
  17. package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +99 -95
  18. package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +261 -0
  19. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +0 -3
  20. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +1 -0
  21. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +16 -5
  22. package/src/duckdb/src/function/cast/bit_cast.cpp +0 -2
  23. package/src/duckdb/src/function/cast/blob_cast.cpp +0 -1
  24. package/src/duckdb/src/function/scalar/bit/bitstring.cpp +99 -0
  25. package/src/duckdb/src/function/scalar/map/map_entries.cpp +61 -0
  26. package/src/duckdb/src/function/scalar/map/map_keys_values.cpp +97 -0
  27. package/src/duckdb/src/function/scalar/nested_functions.cpp +3 -0
  28. package/src/duckdb/src/function/scalar/operators/add.cpp +0 -9
  29. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +2 -14
  30. package/src/duckdb/src/function/scalar/operators/bitwise.cpp +0 -63
  31. package/src/duckdb/src/function/scalar/operators/multiply.cpp +0 -6
  32. package/src/duckdb/src/function/scalar/operators/subtract.cpp +0 -6
  33. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  34. package/src/duckdb/src/function/table/read_csv.cpp +9 -0
  35. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  36. package/src/duckdb/src/function/table_function.cpp +19 -0
  37. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +6 -8
  38. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -19
  39. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +2 -1
  40. package/src/duckdb/src/include/duckdb/common/enums/tableref_type.hpp +2 -1
  41. package/src/duckdb/src/include/duckdb/common/types/bit.hpp +5 -1
  42. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -8
  43. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -2
  44. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +5 -0
  45. package/src/duckdb/src/include/duckdb/function/scalar/bit_functions.hpp +4 -0
  46. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +12 -0
  47. package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
  48. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +2 -0
  49. package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
  50. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -0
  51. package/src/duckdb/src/include/duckdb/optimizer/join_order/cardinality_estimator.hpp +2 -2
  52. package/src/duckdb/src/include/duckdb/parser/common_table_expression_info.hpp +2 -0
  53. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +2 -1
  54. package/src/duckdb/src/include/duckdb/parser/parsed_data/{alter_function_info.hpp → alter_scalar_function_info.hpp} +13 -13
  55. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_function_info.hpp +47 -0
  56. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_table_function_info.hpp +2 -1
  57. package/src/duckdb/src/include/duckdb/parser/query_node.hpp +2 -1
  58. package/src/duckdb/src/include/duckdb/parser/statement/multi_statement.hpp +28 -0
  59. package/src/duckdb/src/include/duckdb/parser/tableref/list.hpp +1 -0
  60. package/src/duckdb/src/include/duckdb/parser/tableref/pivotref.hpp +76 -0
  61. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +2 -0
  62. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +28 -0
  63. package/src/duckdb/src/include/duckdb/planner/binder.hpp +8 -0
  64. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +2 -0
  65. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +76 -44
  66. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +2 -0
  67. package/src/duckdb/src/include/duckdb/storage/statistics/node_statistics.hpp +26 -0
  68. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  69. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +2 -0
  70. package/src/duckdb/src/include/duckdb.h +49 -1
  71. package/src/duckdb/src/include/duckdb.hpp +0 -1
  72. package/src/duckdb/src/main/capi/pending-c.cpp +16 -3
  73. package/src/duckdb/src/main/capi/result-c.cpp +27 -1
  74. package/src/duckdb/src/main/capi/stream-c.cpp +25 -0
  75. package/src/duckdb/src/main/client_context.cpp +8 -1
  76. package/src/duckdb/src/main/database.cpp +10 -2
  77. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +98 -66
  78. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +16 -3
  79. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +7 -3
  80. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +56 -0
  81. package/src/duckdb/src/parser/parsed_data/alter_table_function_info.cpp +51 -0
  82. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  83. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +6 -0
  84. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +8 -0
  85. package/src/duckdb/src/parser/query_node.cpp +1 -1
  86. package/src/duckdb/src/parser/statement/multi_statement.cpp +18 -0
  87. package/src/duckdb/src/parser/tableref/pivotref.cpp +296 -0
  88. package/src/duckdb/src/parser/tableref.cpp +3 -0
  89. package/src/duckdb/src/parser/transform/helpers/transform_alias.cpp +12 -6
  90. package/src/duckdb/src/parser/transform/helpers/transform_cte.cpp +24 -0
  91. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +4 -0
  92. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +4 -0
  93. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +150 -0
  94. package/src/duckdb/src/parser/transform/statement/transform_select.cpp +8 -0
  95. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +1 -1
  96. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +105 -0
  97. package/src/duckdb/src/parser/transform/tableref/transform_tableref.cpp +2 -0
  98. package/src/duckdb/src/parser/transformer.cpp +15 -3
  99. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +11 -3
  100. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +1 -1
  101. package/src/duckdb/src/planner/binder/statement/bind_logical_plan.cpp +17 -0
  102. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +365 -0
  103. package/src/duckdb/src/planner/binder.cpp +5 -0
  104. package/src/duckdb/src/planner/pragma_handler.cpp +10 -2
  105. package/src/duckdb/src/storage/buffer_manager.cpp +44 -46
  106. package/src/duckdb/src/storage/compression/bitpacking.cpp +25 -21
  107. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +41 -43
  108. package/src/duckdb/src/storage/compression/rle.cpp +17 -13
  109. package/src/duckdb/src/storage/statistics/base_statistics.cpp +3 -3
  110. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  111. package/src/duckdb/src/storage/table/column_data.cpp +5 -2
  112. package/src/duckdb/src/storage/table/list_column_data.cpp +32 -47
  113. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +3 -0
  114. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +34 -1
  115. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +1016 -530
  116. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +5 -0
  117. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +22697 -21987
  118. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +2 -0
  119. package/src/duckdb/ub_src_function_scalar_bit.cpp +2 -0
  120. package/src/duckdb/ub_src_function_scalar_map.cpp +4 -0
  121. package/src/duckdb/ub_src_main_capi.cpp +2 -0
  122. package/src/duckdb/ub_src_parser_parsed_data.cpp +4 -2
  123. package/src/duckdb/ub_src_parser_statement.cpp +2 -0
  124. package/src/duckdb/ub_src_parser_tableref.cpp +2 -0
  125. package/src/duckdb/ub_src_parser_transform_statement.cpp +2 -0
  126. package/src/duckdb/ub_src_parser_transform_tableref.cpp +2 -0
  127. package/src/duckdb/ub_src_planner_binder_tableref.cpp +2 -0
  128. package/src/duckdb/src/include/duckdb/main/loadable_extension.hpp +0 -59
  129. package/src/duckdb/src/parser/parsed_data/alter_function_info.cpp +0 -55
@@ -3,6 +3,8 @@
3
3
  #include "duckdb/common/types/null_value.hpp"
4
4
  #include "duckdb/common/vector_operations/vector_operations.hpp"
5
5
  #include "duckdb/common/vector_operations/aggregate_executor.hpp"
6
+ #include "duckdb/common/types/bit.hpp"
7
+ #include "duckdb/common/types/cast_helpers.hpp"
6
8
 
7
9
  namespace duckdb {
8
10
 
@@ -38,37 +40,32 @@ static AggregateFunction GetBitfieldUnaryAggregate(LogicalType type) {
38
40
  }
39
41
  }
40
42
 
41
- struct BitAndOperation {
43
+ struct BitwiseOperation {
42
44
  template <class STATE>
43
45
  static void Initialize(STATE *state) {
44
- // If there are no matching rows, BIT_AND() returns a null value.
46
+ // If there are no matching rows, returns a null value.
45
47
  state->is_set = false;
46
48
  }
47
49
 
48
50
  template <class INPUT_TYPE, class STATE, class OP>
49
51
  static void Operation(STATE *state, AggregateInputData &, INPUT_TYPE *input, ValidityMask &mask, idx_t idx) {
50
52
  if (!state->is_set) {
53
+ OP::template Assign(state, input[idx]);
51
54
  state->is_set = true;
52
- state->value = input[idx];
53
55
  } else {
54
- state->value &= input[idx];
56
+ OP::template Execute(state, input[idx]);
55
57
  }
56
58
  }
57
59
 
58
60
  template <class INPUT_TYPE, class STATE, class OP>
59
61
  static void ConstantOperation(STATE *state, AggregateInputData &aggr_input_data, INPUT_TYPE *input,
60
62
  ValidityMask &mask, idx_t count) {
61
- // count is not relevant
62
- Operation<INPUT_TYPE, STATE, OP>(state, aggr_input_data, input, mask, 0);
63
+ OP::template Operation<INPUT_TYPE, STATE, OP>(state, aggr_input_data, input, mask, 0);
63
64
  }
64
65
 
65
- template <class T, class STATE>
66
- static void Finalize(Vector &result, AggregateInputData &, STATE *state, T *target, ValidityMask &mask, idx_t idx) {
67
- if (!state->is_set) {
68
- mask.SetInvalid(idx);
69
- } else {
70
- target[idx] = state->value;
71
- }
66
+ template <class INPUT_TYPE, class STATE>
67
+ static void Assign(STATE *state, INPUT_TYPE input) {
68
+ state->value = input;
72
69
  }
73
70
 
74
71
  template <class STATE, class OP>
@@ -79,9 +76,19 @@ struct BitAndOperation {
79
76
  }
80
77
  if (!target->is_set) {
81
78
  // target is NULL, use source value directly.
82
- *target = source;
79
+ OP::template Assign(target, source.value);
80
+ target->is_set = true;
81
+ } else {
82
+ OP::template Execute(target, source.value);
83
+ }
84
+ }
85
+
86
+ template <class T, class STATE>
87
+ static void Finalize(Vector &result, AggregateInputData &, STATE *state, T *target, ValidityMask &mask, idx_t idx) {
88
+ if (!state->is_set) {
89
+ mask.SetInvalid(idx);
83
90
  } else {
84
- target->value &= source.value;
91
+ target[idx] = state->value;
85
92
  }
86
93
  }
87
94
 
@@ -90,36 +97,55 @@ struct BitAndOperation {
90
97
  }
91
98
  };
92
99
 
93
- void BitAndFun::RegisterFunction(BuiltinFunctions &set) {
94
- AggregateFunctionSet bit_and("bit_and");
95
- for (auto &type : LogicalType::Integral()) {
96
- bit_and.AddFunction(GetBitfieldUnaryAggregate<BitAndOperation>(type));
100
+ struct BitAndOperation : public BitwiseOperation {
101
+ template <class INPUT_TYPE, class STATE>
102
+ static void Execute(STATE *state, INPUT_TYPE input) {
103
+ state->value &= input;
97
104
  }
98
- set.AddFunction(bit_and);
99
- }
105
+ };
100
106
 
101
- struct BitOrOperation {
102
- template <class STATE>
103
- static void Initialize(STATE *state) {
104
- // If there are no matching rows, BIT_OR() returns a null value.
105
- state->is_set = false;
107
+ struct BitOrOperation : public BitwiseOperation {
108
+ template <class INPUT_TYPE, class STATE>
109
+ static void Execute(STATE *state, INPUT_TYPE input) {
110
+ state->value |= input;
106
111
  }
112
+ };
107
113
 
108
- template <class INPUT_TYPE, class STATE, class OP>
109
- static void Operation(STATE *state, AggregateInputData &, INPUT_TYPE *input, ValidityMask &mask, idx_t idx) {
110
- if (!state->is_set) {
111
- state->is_set = true;
112
- state->value = input[idx];
113
- } else {
114
- state->value |= input[idx];
115
- }
114
+ struct BitXorOperation : public BitwiseOperation {
115
+ template <class INPUT_TYPE, class STATE>
116
+ static void Execute(STATE *state, INPUT_TYPE input) {
117
+ state->value ^= input;
116
118
  }
117
119
 
118
120
  template <class INPUT_TYPE, class STATE, class OP>
119
121
  static void ConstantOperation(STATE *state, AggregateInputData &aggr_input_data, INPUT_TYPE *input,
120
122
  ValidityMask &mask, idx_t count) {
121
- // count is irrelevant
122
- Operation<INPUT_TYPE, STATE, OP>(state, aggr_input_data, input, mask, 0);
123
+ for (idx_t i = 0; i < count; i++) {
124
+ Operation<INPUT_TYPE, STATE, OP>(state, aggr_input_data, input, mask, 0);
125
+ }
126
+ }
127
+ };
128
+
129
+ struct BitStringBitwiseOperation : public BitwiseOperation {
130
+ template <class STATE>
131
+ static void Destroy(STATE *state) {
132
+ if (state->is_set && !state->value.IsInlined()) {
133
+ delete[] state->value.GetDataUnsafe();
134
+ }
135
+ }
136
+
137
+ template <class INPUT_TYPE, class STATE>
138
+ static void Assign(STATE *state, INPUT_TYPE input) {
139
+ D_ASSERT(state->is_set == false);
140
+ if (input.IsInlined()) {
141
+ state->value = input;
142
+ } else { // non-inlined string, need to allocate space for it
143
+ auto len = input.GetSize();
144
+ auto ptr = new char[len];
145
+ memcpy(ptr, input.GetDataUnsafe(), len);
146
+
147
+ state->value = string_t(ptr, len);
148
+ }
123
149
  }
124
150
 
125
151
  template <class T, class STATE>
@@ -127,52 +153,31 @@ struct BitOrOperation {
127
153
  if (!state->is_set) {
128
154
  mask.SetInvalid(idx);
129
155
  } else {
130
- target[idx] = state->value;
156
+ target[idx] = StringVector::AddStringOrBlob(result, state->value);
131
157
  }
132
158
  }
159
+ };
133
160
 
134
- template <class STATE, class OP>
135
- static void Combine(const STATE &source, STATE *target, AggregateInputData &) {
136
- if (!source.is_set) {
137
- // source is NULL, nothing to do.
138
- return;
139
- }
140
- if (!target->is_set) {
141
- // target is NULL, use source value directly.
142
- *target = source;
143
- } else {
144
- target->value |= source.value;
145
- }
146
- }
161
+ struct BitStringAndOperation : public BitStringBitwiseOperation {
147
162
 
148
- static bool IgnoreNull() {
149
- return true;
163
+ template <class INPUT_TYPE, class STATE>
164
+ static void Execute(STATE *state, INPUT_TYPE input) {
165
+ Bit::BitwiseAnd(input, state->value, state->value);
150
166
  }
151
167
  };
152
168
 
153
- void BitOrFun::RegisterFunction(BuiltinFunctions &set) {
154
- AggregateFunctionSet bit_or("bit_or");
155
- for (auto &type : LogicalType::Integral()) {
156
- bit_or.AddFunction(GetBitfieldUnaryAggregate<BitOrOperation>(type));
157
- }
158
- set.AddFunction(bit_or);
159
- }
169
+ struct BitStringOrOperation : public BitStringBitwiseOperation {
160
170
 
161
- struct BitXorOperation {
162
- template <class STATE>
163
- static void Initialize(STATE *state) {
164
- // If there are no matching rows, BIT_XOR() returns a null value.
165
- state->is_set = false;
171
+ template <class INPUT_TYPE, class STATE>
172
+ static void Execute(STATE *state, INPUT_TYPE input) {
173
+ Bit::BitwiseOr(input, state->value, state->value);
166
174
  }
175
+ };
167
176
 
168
- template <class INPUT_TYPE, class STATE, class OP>
169
- static void Operation(STATE *state, AggregateInputData &, INPUT_TYPE *input, ValidityMask &mask, idx_t idx) {
170
- if (!state->is_set) {
171
- state->is_set = true;
172
- state->value = input[idx];
173
- } else {
174
- state->value ^= input[idx];
175
- }
177
+ struct BitStringXorOperation : public BitStringBitwiseOperation {
178
+ template <class INPUT_TYPE, class STATE>
179
+ static void Execute(STATE *state, INPUT_TYPE input) {
180
+ Bit::BitwiseXor(input, state->value, state->value);
176
181
  }
177
182
 
178
183
  template <class INPUT_TYPE, class STATE, class OP>
@@ -182,40 +187,39 @@ struct BitXorOperation {
182
187
  Operation<INPUT_TYPE, STATE, OP>(state, aggr_input_data, input, mask, 0);
183
188
  }
184
189
  }
190
+ };
185
191
 
186
- template <class T, class STATE>
187
- static void Finalize(Vector &result, AggregateInputData &, STATE *state, T *target, ValidityMask &mask, idx_t idx) {
188
- if (!state->is_set) {
189
- mask.SetInvalid(idx);
190
- } else {
191
- target[idx] = state->value;
192
- }
192
+ void BitAndFun::RegisterFunction(BuiltinFunctions &set) {
193
+ AggregateFunctionSet bit_and("bit_and");
194
+ for (auto &type : LogicalType::Integral()) {
195
+ bit_and.AddFunction(GetBitfieldUnaryAggregate<BitAndOperation>(type));
193
196
  }
194
197
 
195
- template <class STATE, class OP>
196
- static void Combine(const STATE &source, STATE *target, AggregateInputData &) {
197
- if (!source.is_set) {
198
- // source is NULL, nothing to do.
199
- return;
200
- }
201
- if (!target->is_set) {
202
- // target is NULL, use source value directly.
203
- *target = source;
204
- } else {
205
- target->value ^= source.value;
206
- }
207
- }
198
+ bit_and.AddFunction(
199
+ AggregateFunction::UnaryAggregateDestructor<BitState<string_t>, string_t, string_t, BitStringAndOperation>(
200
+ LogicalType::BIT, LogicalType::BIT));
201
+ set.AddFunction(bit_and);
202
+ }
208
203
 
209
- static bool IgnoreNull() {
210
- return true;
204
+ void BitOrFun::RegisterFunction(BuiltinFunctions &set) {
205
+ AggregateFunctionSet bit_or("bit_or");
206
+ for (auto &type : LogicalType::Integral()) {
207
+ bit_or.AddFunction(GetBitfieldUnaryAggregate<BitOrOperation>(type));
211
208
  }
212
- };
209
+ bit_or.AddFunction(
210
+ AggregateFunction::UnaryAggregateDestructor<BitState<string_t>, string_t, string_t, BitStringOrOperation>(
211
+ LogicalType::BIT, LogicalType::BIT));
212
+ set.AddFunction(bit_or);
213
+ }
213
214
 
214
215
  void BitXorFun::RegisterFunction(BuiltinFunctions &set) {
215
216
  AggregateFunctionSet bit_xor("bit_xor");
216
217
  for (auto &type : LogicalType::Integral()) {
217
218
  bit_xor.AddFunction(GetBitfieldUnaryAggregate<BitXorOperation>(type));
218
219
  }
220
+ bit_xor.AddFunction(
221
+ AggregateFunction::UnaryAggregateDestructor<BitState<string_t>, string_t, string_t, BitStringXorOperation>(
222
+ LogicalType::BIT, LogicalType::BIT));
219
223
  set.AddFunction(bit_xor);
220
224
  }
221
225
 
@@ -0,0 +1,261 @@
1
+ #include "duckdb/function/aggregate/distributive_functions.hpp"
2
+ #include "duckdb/common/exception.hpp"
3
+ #include "duckdb/common/types/null_value.hpp"
4
+ #include "duckdb/common/vector_operations/aggregate_executor.hpp"
5
+ #include "duckdb/common/types/bit.hpp"
6
+ #include "duckdb/storage/statistics/numeric_statistics.hpp"
7
+ #include "duckdb/execution/expression_executor.hpp"
8
+ #include "duckdb/common/types/cast_helpers.hpp"
9
+
10
+ namespace duckdb {
11
+
12
+ template <class INPUT_TYPE>
13
+ struct BitAggState {
14
+ bool is_set;
15
+ string_t value;
16
+ INPUT_TYPE min;
17
+ INPUT_TYPE max;
18
+ };
19
+
20
+ struct BitstringAggBindData : public FunctionData {
21
+ Value min;
22
+ Value max;
23
+
24
+ BitstringAggBindData() {
25
+ }
26
+
27
+ BitstringAggBindData(Value min, Value max) : min(std::move(min)), max(std::move(max)) {
28
+ }
29
+
30
+ unique_ptr<FunctionData> Copy() const override {
31
+ return make_unique<BitstringAggBindData>(*this);
32
+ }
33
+
34
+ bool Equals(const FunctionData &other_p) const override {
35
+ auto &other = (BitstringAggBindData &)other_p;
36
+ if (min.IsNull() && other.min.IsNull() && max.IsNull() && other.max.IsNull()) {
37
+ return true;
38
+ }
39
+ if (Value::NotDistinctFrom(min, other.min) && Value::NotDistinctFrom(max, other.max)) {
40
+ return true;
41
+ }
42
+ return false;
43
+ }
44
+ };
45
+
46
+ struct BitStringAggOperation {
47
+ static constexpr const idx_t MAX_BIT_RANGE = 1000000000; // for now capped at 1 billion bits
48
+
49
+ template <class STATE>
50
+ static void Initialize(STATE *state) {
51
+ state->is_set = false;
52
+ }
53
+
54
+ template <class INPUT_TYPE, class STATE, class OP>
55
+ static void Operation(STATE *state, AggregateInputData &data, INPUT_TYPE *input, ValidityMask &mask, idx_t idx) {
56
+ auto bind_agg_data = (BitstringAggBindData *)data.bind_data;
57
+ if (!state->is_set) {
58
+ if (bind_agg_data->min.IsNull() || bind_agg_data->max.IsNull()) {
59
+ throw BinderException(
60
+ "Could not retrieve required statistics. Alternatively, try by providing the statistics "
61
+ "explicitly: BITSTRING_AGG(col, min, max) ");
62
+ }
63
+ state->min = bind_agg_data->min.GetValue<INPUT_TYPE>();
64
+ state->max = bind_agg_data->max.GetValue<INPUT_TYPE>();
65
+ idx_t bit_range =
66
+ GetRange(bind_agg_data->min.GetValue<INPUT_TYPE>(), bind_agg_data->max.GetValue<INPUT_TYPE>());
67
+ if (bit_range > MAX_BIT_RANGE) {
68
+ throw OutOfRangeException(
69
+ "The range between min and max value (%s <-> %s) is too large for bitstring aggregation",
70
+ NumericHelper::ToString(state->min), NumericHelper::ToString(state->max));
71
+ }
72
+ idx_t len = Bit::ComputeBitstringLen(bit_range);
73
+ auto target = len > string_t::INLINE_LENGTH ? string_t(new char[len], len) : string_t(len);
74
+ Bit::SetEmptyBitString(target, bit_range);
75
+
76
+ state->value = target;
77
+ state->is_set = true;
78
+ }
79
+ if (input[idx] >= state->min && input[idx] <= state->max) {
80
+ Execute(state, input[idx], bind_agg_data->min.GetValue<INPUT_TYPE>());
81
+ } else {
82
+ throw OutOfRangeException("Value %s is outside of provided min and max range (%s <-> %s)",
83
+ NumericHelper::ToString(input[idx]), NumericHelper::ToString(state->min),
84
+ NumericHelper::ToString(state->max));
85
+ }
86
+ }
87
+
88
+ template <class INPUT_TYPE, class STATE, class OP>
89
+ static void ConstantOperation(STATE *state, AggregateInputData &aggr_input_data, INPUT_TYPE *input,
90
+ ValidityMask &mask, idx_t count) {
91
+ OP::template Operation<INPUT_TYPE, STATE, OP>(state, aggr_input_data, input, mask, 0);
92
+ }
93
+
94
+ template <class INPUT_TYPE>
95
+ static idx_t GetRange(INPUT_TYPE min, INPUT_TYPE max) {
96
+ return max - min + 1;
97
+ }
98
+
99
+ template <class INPUT_TYPE, class STATE>
100
+ static void Execute(STATE *state, INPUT_TYPE input, INPUT_TYPE min) {
101
+ Bit::SetBit(state->value, input - min, 1);
102
+ }
103
+
104
+ template <class STATE, class OP>
105
+ static void Combine(const STATE &source, STATE *target, AggregateInputData &) {
106
+ if (!source.is_set) {
107
+ return;
108
+ }
109
+ if (!target->is_set) {
110
+ Assign(target, source.value);
111
+ target->is_set = true;
112
+ target->min = source.min;
113
+ target->max = source.max;
114
+ } else {
115
+ Bit::BitwiseOr(source.value, target->value, target->value);
116
+ }
117
+ }
118
+
119
+ template <class INPUT_TYPE, class STATE>
120
+ static void Assign(STATE *state, INPUT_TYPE input) {
121
+ D_ASSERT(state->is_set == false);
122
+ if (input.IsInlined()) {
123
+ state->value = input;
124
+ } else { // non-inlined string, need to allocate space for it
125
+ auto len = input.GetSize();
126
+ auto ptr = new char[len];
127
+ memcpy(ptr, input.GetDataUnsafe(), len);
128
+ state->value = string_t(ptr, len);
129
+ }
130
+ }
131
+
132
+ template <class T, class STATE>
133
+ static void Finalize(Vector &result, AggregateInputData &, STATE *state, T *target, ValidityMask &mask, idx_t idx) {
134
+ if (!state->is_set) {
135
+ mask.SetInvalid(idx);
136
+ } else {
137
+ target[idx] = StringVector::AddStringOrBlob(result, state->value);
138
+ }
139
+ }
140
+
141
+ template <class STATE>
142
+ static void Destroy(STATE *state) {
143
+ if (state->is_set && !state->value.IsInlined()) {
144
+ delete[] state->value.GetDataUnsafe();
145
+ }
146
+ }
147
+
148
+ static bool IgnoreNull() {
149
+ return true;
150
+ }
151
+ };
152
+
153
+ template <>
154
+ void BitStringAggOperation::Execute(BitAggState<hugeint_t> *state, hugeint_t input, hugeint_t min) {
155
+ idx_t val;
156
+ if (Hugeint::TryCast(input - min, val)) {
157
+ Bit::SetBit(state->value, val, 1);
158
+ } else {
159
+ throw OutOfRangeException("Range too large for bitstring aggregation");
160
+ }
161
+ }
162
+
163
+ template <>
164
+ idx_t BitStringAggOperation::GetRange(hugeint_t min, hugeint_t max) {
165
+ idx_t val;
166
+ if (Hugeint::TryCast(max - min + 1, val)) {
167
+ return val;
168
+ } else {
169
+ throw OutOfRangeException("Range too large for bitstring aggregation");
170
+ }
171
+ }
172
+
173
+ unique_ptr<BaseStatistics> BitstringPropagateStats(ClientContext &context, BoundAggregateExpression &expr,
174
+ FunctionData *bind_data,
175
+ vector<unique_ptr<BaseStatistics>> &child_stats,
176
+ NodeStatistics *node_stats) {
177
+
178
+ if (child_stats[0]) {
179
+ auto &numeric_stats = (NumericStatistics &)*child_stats[0];
180
+ if (numeric_stats.min.IsNull() || numeric_stats.max.IsNull()) {
181
+ return nullptr;
182
+ }
183
+ auto bind_agg_data = (BitstringAggBindData *)bind_data;
184
+ bind_agg_data->min = numeric_stats.min;
185
+ bind_agg_data->max = numeric_stats.max;
186
+ } else {
187
+ throw BinderException("Could not retrieve required statistics. Alternatively, try by providing the statistics "
188
+ "explicitly: BITSTRING_AGG(col, min, max) ");
189
+ }
190
+ return nullptr;
191
+ }
192
+
193
+ unique_ptr<FunctionData> BindBitstringAgg(ClientContext &context, AggregateFunction &function,
194
+ vector<unique_ptr<Expression>> &arguments) {
195
+
196
+ if (arguments.size() == 3) {
197
+ auto min = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
198
+ auto max = ExpressionExecutor::EvaluateScalar(context, *arguments[2]);
199
+ Function::EraseArgument(function, arguments, 2);
200
+ Function::EraseArgument(function, arguments, 1);
201
+ return make_unique<BitstringAggBindData>(min, max);
202
+ }
203
+ return make_unique<BitstringAggBindData>();
204
+ }
205
+
206
+ template <class TYPE>
207
+ static void BindBitString(AggregateFunctionSet &bitstring_agg, const LogicalTypeId &type) {
208
+ auto function =
209
+ AggregateFunction::UnaryAggregateDestructor<BitAggState<TYPE>, TYPE, string_t, BitStringAggOperation>(
210
+ type, LogicalType::BIT);
211
+ function.bind = BindBitstringAgg; // create new a 'BitstringAggBindData'
212
+ function.statistics = BitstringPropagateStats; // stores min and max from column stats in BitstringAggBindData
213
+ bitstring_agg.AddFunction(function); // uses the BitstringAggBindData to access statistics for creating bitstring
214
+ function.arguments = {type, type, type};
215
+ function.statistics = nullptr; // min and max are provided as arguments
216
+ bitstring_agg.AddFunction(function);
217
+ }
218
+
219
+ void BitStringAggFun::GetBitStringAggregate(const LogicalType &type, AggregateFunctionSet &bitstring_agg) {
220
+ switch (type.id()) {
221
+ case LogicalType::TINYINT: {
222
+ return BindBitString<int8_t>(bitstring_agg, type.id());
223
+ }
224
+ case LogicalType::SMALLINT: {
225
+ return BindBitString<int16_t>(bitstring_agg, type.id());
226
+ }
227
+ case LogicalType::INTEGER: {
228
+ return BindBitString<int32_t>(bitstring_agg, type.id());
229
+ }
230
+ case LogicalType::BIGINT: {
231
+ return BindBitString<int64_t>(bitstring_agg, type.id());
232
+ }
233
+ case LogicalType::HUGEINT: {
234
+ return BindBitString<hugeint_t>(bitstring_agg, type.id());
235
+ }
236
+ case LogicalType::UTINYINT: {
237
+ return BindBitString<uint8_t>(bitstring_agg, type.id());
238
+ }
239
+ case LogicalType::USMALLINT: {
240
+ return BindBitString<uint16_t>(bitstring_agg, type.id());
241
+ }
242
+ case LogicalType::UINTEGER: {
243
+ return BindBitString<uint32_t>(bitstring_agg, type.id());
244
+ }
245
+ case LogicalType::UBIGINT: {
246
+ return BindBitString<uint64_t>(bitstring_agg, type.id());
247
+ }
248
+ default:
249
+ throw InternalException("Unimplemented bitstring aggregate");
250
+ }
251
+ }
252
+
253
+ void BitStringAggFun::RegisterFunction(BuiltinFunctions &set) {
254
+ AggregateFunctionSet bitstring_agg("bitstring_agg");
255
+ for (auto &type : LogicalType::Integral()) {
256
+ GetBitStringAggregate(type, bitstring_agg);
257
+ }
258
+ set.AddFunction(bitstring_agg);
259
+ }
260
+
261
+ } // namespace duckdb
@@ -52,9 +52,6 @@ struct DoubleSumOperation : public BaseSumOperation<SumSetOperation, ADD_OPERATO
52
52
  if (!state->isset) {
53
53
  mask.SetInvalid(idx);
54
54
  } else {
55
- if (!Value::DoubleIsFinite(state->value)) {
56
- throw OutOfRangeException("SUM is out of range!");
57
- }
58
55
  target[idx] = state->value;
59
56
  }
60
57
  }
@@ -10,6 +10,7 @@ void BuiltinFunctions::RegisterDistributiveAggregates() {
10
10
  Register<BitAndFun>();
11
11
  Register<BitOrFun>();
12
12
  Register<BitXorFun>();
13
+ Register<BitStringAggFun>();
13
14
  Register<CountStarFun>();
14
15
  Register<CountFun>();
15
16
  Register<FirstFun>();
@@ -71,17 +71,25 @@ struct SortedAggregateBindData : public FunctionData {
71
71
  };
72
72
 
73
73
  struct SortedAggregateState {
74
- static const idx_t BUFFER_CAPACITY = STANDARD_VECTOR_SIZE;
74
+ //! Default buffer size, optimised for small group to avoid blowing out memory.
75
+ static const idx_t BUFFER_CAPACITY = 16;
75
76
 
76
77
  SortedAggregateState() : nsel(0) {
77
78
  }
78
79
 
79
80
  static inline void InitializeBuffer(DataChunk &chunk, const vector<LogicalType> &types) {
80
81
  if (!chunk.ColumnCount() && !types.empty()) {
81
- chunk.Initialize(Allocator::DefaultAllocator(), types);
82
+ chunk.Initialize(Allocator::DefaultAllocator(), types, BUFFER_CAPACITY);
82
83
  }
83
84
  }
84
85
 
86
+ //! Make sure the buffer is large enough for slicing
87
+ static inline void ResetBuffer(DataChunk &chunk, const vector<LogicalType> &types) {
88
+ chunk.Reset();
89
+ chunk.Destroy();
90
+ chunk.Initialize(Allocator::DefaultAllocator(), types);
91
+ }
92
+
85
93
  void Flush(SortedAggregateBindData &order_bind) {
86
94
  if (ordering) {
87
95
  return;
@@ -90,10 +98,12 @@ struct SortedAggregateState {
90
98
  ordering = make_unique<ColumnDataCollection>(order_bind.buffer_manager, order_bind.sort_types);
91
99
  InitializeBuffer(sort_buffer, order_bind.sort_types);
92
100
  ordering->Append(sort_buffer);
101
+ ResetBuffer(sort_buffer, order_bind.sort_types);
93
102
 
94
103
  arguments = make_unique<ColumnDataCollection>(order_bind.buffer_manager, order_bind.arg_types);
95
104
  InitializeBuffer(arg_buffer, order_bind.arg_types);
96
105
  arguments->Append(arg_buffer);
106
+ ResetBuffer(arg_buffer, order_bind.arg_types);
97
107
  }
98
108
 
99
109
  void Update(SortedAggregateBindData &order_bind, DataChunk &sort_chunk, DataChunk &arg_chunk) {
@@ -101,7 +111,7 @@ struct SortedAggregateState {
101
111
  InitializeBuffer(sort_buffer, order_bind.sort_types);
102
112
  InitializeBuffer(arg_buffer, order_bind.arg_types);
103
113
 
104
- if (sort_chunk.size() + sort_buffer.size() > BUFFER_CAPACITY) {
114
+ if (sort_chunk.size() + sort_buffer.size() > STANDARD_VECTOR_SIZE) {
105
115
  Flush(order_bind);
106
116
  }
107
117
  if (ordering) {
@@ -118,7 +128,7 @@ struct SortedAggregateState {
118
128
  InitializeBuffer(sort_buffer, order_bind.sort_types);
119
129
  InitializeBuffer(arg_buffer, order_bind.arg_types);
120
130
 
121
- if (nsel + sort_buffer.size() > BUFFER_CAPACITY) {
131
+ if (nsel + sort_buffer.size() > STANDARD_VECTOR_SIZE) {
122
132
  Flush(order_bind);
123
133
  }
124
134
  if (ordering) {
@@ -276,6 +286,8 @@ struct SortedAggregateFunction {
276
286
  auto &orders = order_bind->orders;
277
287
  RowLayout payload_layout;
278
288
  payload_layout.Initialize(order_bind->arg_types);
289
+ DataChunk chunk;
290
+ chunk.Initialize(Allocator::DefaultAllocator(), order_bind->arg_types);
279
291
 
280
292
  // Reusable inner state
281
293
  vector<data_t> agg_state(order_bind->function.state_size());
@@ -314,7 +326,6 @@ struct SortedAggregateFunction {
314
326
  global_sort->CompleteMergeRound(false);
315
327
  }
316
328
 
317
- auto &chunk = state->arg_buffer;
318
329
  PayloadScanner scanner(*global_sort);
319
330
  for (;;) {
320
331
  chunk.Reset();
@@ -9,8 +9,6 @@ BoundCastInfo DefaultCasts::BitCastSwitch(BindCastInput &input, const LogicalTyp
9
9
  case LogicalTypeId::VARCHAR:
10
10
  // bit to varchar
11
11
  return BoundCastInfo(&VectorCastHelpers::StringCast<string_t, duckdb::CastFromBit>);
12
- case LogicalTypeId::BLOB:
13
- return DefaultCasts::ReinterpretCast;
14
12
  default:
15
13
  return DefaultCasts::TryVectorNullCast;
16
14
  }
@@ -10,7 +10,6 @@ BoundCastInfo DefaultCasts::BlobCastSwitch(BindCastInput &input, const LogicalTy
10
10
  // blob to varchar
11
11
  return BoundCastInfo(&VectorCastHelpers::StringCast<string_t, duckdb::CastFromBlob>);
12
12
  case LogicalTypeId::AGGREGATE_STATE:
13
- case LogicalTypeId::BIT:
14
13
  return DefaultCasts::ReinterpretCast;
15
14
  default:
16
15
  return DefaultCasts::TryVectorNullCast;