duckdb 0.7.2-dev1734.0 → 0.7.2-dev1803.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev1734.0",
5
+ "version": "0.7.2-dev1803.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -1187,8 +1187,8 @@ idx_t StructType::GetChildCount(const LogicalType &type) {
1187
1187
  return StructType::GetChildTypes(type).size();
1188
1188
  }
1189
1189
 
1190
- LogicalType LogicalType::STRUCT(const child_list_t<LogicalType> &children) {
1191
- auto info = make_shared<StructTypeInfo>(children);
1190
+ LogicalType LogicalType::STRUCT(child_list_t<LogicalType> children) {
1191
+ auto info = make_shared<StructTypeInfo>(std::move(children));
1192
1192
  return LogicalType(LogicalTypeId::STRUCT, std::move(info));
1193
1193
  }
1194
1194
 
@@ -443,6 +443,63 @@ void FloorFun::RegisterFunction(BuiltinFunctions &set) {
443
443
  set.AddFunction(floor);
444
444
  }
445
445
 
446
+ //===--------------------------------------------------------------------===//
447
+ // trunc
448
+ //===--------------------------------------------------------------------===//
449
+ struct TruncOperator {
450
+ // Integer truncation is a NOP
451
+ template <class TA, class TR>
452
+ static inline TR Operation(TA left) {
453
+ return std::trunc(left);
454
+ }
455
+ };
456
+
457
+ struct TruncDecimalOperator {
458
+ template <class T, class POWERS_OF_TEN_CLASS>
459
+ static void Operation(DataChunk &input, uint8_t scale, Vector &result) {
460
+ T power_of_ten = POWERS_OF_TEN_CLASS::POWERS_OF_TEN[scale];
461
+ UnaryExecutor::Execute<T, T>(input.data[0], result, input.size(), [&](T input) {
462
+ // Always floor
463
+ return (input / power_of_ten);
464
+ });
465
+ }
466
+ };
467
+
468
+ void TruncFun::RegisterFunction(BuiltinFunctions &set) {
469
+ ScalarFunctionSet floor("trunc");
470
+ for (auto &type : LogicalType::Numeric()) {
471
+ scalar_function_t func = nullptr;
472
+ bind_scalar_function_t bind_func = nullptr;
473
+ // Truncation of integers gets generated by some tools (e.g., Tableau/JDBC:Postgres)
474
+ switch (type.id()) {
475
+ case LogicalTypeId::FLOAT:
476
+ func = ScalarFunction::UnaryFunction<float, float, TruncOperator>;
477
+ break;
478
+ case LogicalTypeId::DOUBLE:
479
+ func = ScalarFunction::UnaryFunction<double, double, TruncOperator>;
480
+ break;
481
+ case LogicalTypeId::DECIMAL:
482
+ bind_func = BindGenericRoundFunctionDecimal<TruncDecimalOperator>;
483
+ break;
484
+ case LogicalTypeId::TINYINT:
485
+ case LogicalTypeId::SMALLINT:
486
+ case LogicalTypeId::INTEGER:
487
+ case LogicalTypeId::BIGINT:
488
+ case LogicalTypeId::HUGEINT:
489
+ case LogicalTypeId::UTINYINT:
490
+ case LogicalTypeId::USMALLINT:
491
+ case LogicalTypeId::UINTEGER:
492
+ case LogicalTypeId::UBIGINT:
493
+ func = ScalarFunction::NopFunction;
494
+ break;
495
+ default:
496
+ throw InternalException("Unimplemented numeric type for function \"trunc\"");
497
+ }
498
+ floor.AddFunction(ScalarFunction({type}, type, func, bind_func));
499
+ }
500
+ set.AddFunction(floor);
501
+ }
502
+
446
503
  //===--------------------------------------------------------------------===//
447
504
  // round
448
505
  //===--------------------------------------------------------------------===//
@@ -10,6 +10,7 @@ void BuiltinFunctions::RegisterMathFunctions() {
10
10
  Register<CeilFun>();
11
11
  Register<FloorFun>();
12
12
  Register<RoundFun>();
13
+ Register<TruncFun>();
13
14
 
14
15
  Register<DegreesFun>();
15
16
  Register<RadiansFun>();
@@ -1,3 +1,4 @@
1
+ #include "duckdb/common/bit_utils.hpp"
1
2
  #include "duckdb/common/exception.hpp"
2
3
  #include "duckdb/common/string_util.hpp"
3
4
  #include "duckdb/common/types/blob.hpp"
@@ -7,6 +8,58 @@
7
8
 
8
9
  namespace duckdb {
9
10
 
11
+ static void WriteHexBytes(uint64_t x, char *&output, idx_t buffer_size) {
12
+ idx_t offset = buffer_size * 4;
13
+
14
+ for (; offset >= 4; offset -= 4) {
15
+ uint8_t byte = (x >> (offset - 4)) & 0x0F;
16
+ *output = Blob::HEX_TABLE[byte];
17
+ output++;
18
+ }
19
+ }
20
+
21
+ static void WriteHugeIntHexBytes(hugeint_t x, char *&output, idx_t buffer_size) {
22
+ idx_t offset = buffer_size * 4;
23
+ auto upper = x.upper;
24
+ auto lower = x.lower;
25
+
26
+ for (; offset >= 68; offset -= 4) {
27
+ uint8_t byte = (upper >> (offset - 68)) & 0x0F;
28
+ *output = Blob::HEX_TABLE[byte];
29
+ output++;
30
+ }
31
+
32
+ for (; offset >= 4; offset -= 4) {
33
+ uint8_t byte = (lower >> (offset - 4)) & 0x0F;
34
+ *output = Blob::HEX_TABLE[byte];
35
+ output++;
36
+ }
37
+ }
38
+
39
+ static void WriteBinBytes(uint64_t x, char *&output, idx_t buffer_size) {
40
+ idx_t offset = buffer_size;
41
+ for (; offset >= 1; offset -= 1) {
42
+ *output = ((x >> (offset - 1)) & 0x01) + '0';
43
+ output++;
44
+ }
45
+ }
46
+
47
+ static void WriteHugeIntBinBytes(hugeint_t x, char *&output, idx_t buffer_size) {
48
+ auto upper = x.upper;
49
+ auto lower = x.lower;
50
+ idx_t offset = buffer_size;
51
+
52
+ for (; offset >= 65; offset -= 1) {
53
+ *output = ((upper >> (offset - 65)) & 0x01) + '0';
54
+ output++;
55
+ }
56
+
57
+ for (; offset >= 1; offset -= 1) {
58
+ *output = ((lower >> (offset - 1)) & 0x01) + '0';
59
+ output++;
60
+ }
61
+ }
62
+
10
63
  struct HexStrOperator {
11
64
  template <class INPUT_TYPE, class RESULT_TYPE>
12
65
  static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
@@ -29,6 +82,149 @@ struct HexStrOperator {
29
82
  }
30
83
  };
31
84
 
85
+ struct HexIntegralOperator {
86
+ template <class INPUT_TYPE, class RESULT_TYPE>
87
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
88
+
89
+ idx_t num_leading_zero = CountZeros<uint64_t>::Leading(input);
90
+ idx_t num_bits_to_check = 64 - num_leading_zero;
91
+ D_ASSERT(num_bits_to_check <= sizeof(INPUT_TYPE) * 8);
92
+
93
+ idx_t buffer_size = (num_bits_to_check + 3) / 4;
94
+
95
+ // Special case: All bits are zero
96
+ if (buffer_size == 0) {
97
+ auto target = StringVector::EmptyString(result, 1);
98
+ auto output = target.GetDataWriteable();
99
+ *output = '0';
100
+ target.Finalize();
101
+ return target;
102
+ }
103
+
104
+ D_ASSERT(buffer_size > 0);
105
+ auto target = StringVector::EmptyString(result, buffer_size);
106
+ auto output = target.GetDataWriteable();
107
+
108
+ WriteHexBytes(input, output, buffer_size);
109
+
110
+ target.Finalize();
111
+ return target;
112
+ }
113
+ };
114
+
115
+ struct HexHugeIntOperator {
116
+ template <class INPUT_TYPE, class RESULT_TYPE>
117
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
118
+
119
+ idx_t num_leading_zero = CountZeros<hugeint_t>::Leading(input);
120
+ idx_t buffer_size = sizeof(INPUT_TYPE) * 2 - (num_leading_zero / 4);
121
+
122
+ // Special case: All bits are zero
123
+ if (buffer_size == 0) {
124
+ auto target = StringVector::EmptyString(result, 1);
125
+ auto output = target.GetDataWriteable();
126
+ *output = '0';
127
+ target.Finalize();
128
+ return target;
129
+ }
130
+
131
+ D_ASSERT(buffer_size > 0);
132
+ auto target = StringVector::EmptyString(result, buffer_size);
133
+ auto output = target.GetDataWriteable();
134
+
135
+ WriteHugeIntHexBytes(input, output, buffer_size);
136
+
137
+ target.Finalize();
138
+ return target;
139
+ }
140
+ };
141
+
142
+ template <class INPUT, class OP>
143
+ static void ToHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
144
+ D_ASSERT(args.ColumnCount() == 1);
145
+ auto &input = args.data[0];
146
+ idx_t count = args.size();
147
+ UnaryExecutor::ExecuteString<INPUT, string_t, OP>(input, result, count);
148
+ }
149
+
150
+ struct BinaryStrOperator {
151
+ template <class INPUT_TYPE, class RESULT_TYPE>
152
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
153
+ auto data = input.GetDataUnsafe();
154
+ auto size = input.GetSize();
155
+
156
+ // Allocate empty space
157
+ auto target = StringVector::EmptyString(result, size * 8);
158
+ auto output = target.GetDataWriteable();
159
+
160
+ for (idx_t i = 0; i < size; ++i) {
161
+ uint8_t byte = data[i];
162
+ for (idx_t i = 8; i >= 1; --i) {
163
+ *output = ((byte >> (i - 1)) & 0x01) + '0';
164
+ output++;
165
+ }
166
+ }
167
+
168
+ target.Finalize();
169
+ return target;
170
+ }
171
+ };
172
+
173
+ struct BinaryIntegralOperator {
174
+ template <class INPUT_TYPE, class RESULT_TYPE>
175
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
176
+
177
+ idx_t num_leading_zero = CountZeros<uint64_t>::Leading(input);
178
+ idx_t num_bits_to_check = 64 - num_leading_zero;
179
+ D_ASSERT(num_bits_to_check <= sizeof(INPUT_TYPE) * 8);
180
+
181
+ idx_t buffer_size = num_bits_to_check;
182
+
183
+ // Special case: All bits are zero
184
+ if (buffer_size == 0) {
185
+ auto target = StringVector::EmptyString(result, 1);
186
+ auto output = target.GetDataWriteable();
187
+ *output = '0';
188
+ target.Finalize();
189
+ return target;
190
+ }
191
+
192
+ D_ASSERT(buffer_size > 0);
193
+ auto target = StringVector::EmptyString(result, buffer_size);
194
+ auto output = target.GetDataWriteable();
195
+
196
+ WriteBinBytes(input, output, buffer_size);
197
+
198
+ target.Finalize();
199
+ return target;
200
+ }
201
+ };
202
+
203
+ struct BinaryHugeIntOperator {
204
+ template <class INPUT_TYPE, class RESULT_TYPE>
205
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
206
+ idx_t num_leading_zero = CountZeros<hugeint_t>::Leading(input);
207
+ idx_t buffer_size = sizeof(INPUT_TYPE) * 8 - num_leading_zero;
208
+
209
+ // Special case: All bits are zero
210
+ if (buffer_size == 0) {
211
+ auto target = StringVector::EmptyString(result, 1);
212
+ auto output = target.GetDataWriteable();
213
+ *output = '0';
214
+ target.Finalize();
215
+ return target;
216
+ }
217
+
218
+ auto target = StringVector::EmptyString(result, buffer_size);
219
+ auto output = target.GetDataWriteable();
220
+
221
+ WriteHugeIntBinBytes(input, output, buffer_size);
222
+
223
+ target.Finalize();
224
+ return target;
225
+ }
226
+ };
227
+
32
228
  struct FromHexOperator {
33
229
  template <class INPUT_TYPE, class RESULT_TYPE>
34
230
  static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
@@ -66,103 +262,65 @@ struct FromHexOperator {
66
262
  }
67
263
  };
68
264
 
69
- struct HexIntegralOperator {
265
+ struct FromBinaryOperator {
70
266
  template <class INPUT_TYPE, class RESULT_TYPE>
71
267
  static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
72
- // Sufficient space for maximum length
73
- char buffer[sizeof(INPUT_TYPE) * 2];
74
- char *ptr = buffer;
75
- idx_t buffer_size = 0;
76
-
77
- bool seen_non_zero = false;
78
- for (idx_t offset = sizeof(INPUT_TYPE) * 8; offset >= 4; offset -= 4) {
79
- uint8_t byte = (input >> (offset - 4)) & 0x0F;
80
- if (byte == 0 && !seen_non_zero && offset > 4) {
81
- continue;
82
- }
83
- seen_non_zero = true;
84
- *ptr = Blob::HEX_TABLE[byte];
85
- ptr++;
86
- buffer_size++;
268
+ auto data = input.GetDataUnsafe();
269
+ auto size = input.GetSize();
270
+
271
+ if (size > NumericLimits<uint32_t>::Maximum()) {
272
+ throw InvalidInputException("Binary input length larger than 2^32 are not supported");
87
273
  }
88
274
 
275
+ D_ASSERT(size <= NumericLimits<uint32_t>::Maximum());
276
+ auto buffer_size = (size + 7) / 8;
277
+
89
278
  // Allocate empty space
90
279
  auto target = StringVector::EmptyString(result, buffer_size);
91
280
  auto output = target.GetDataWriteable();
92
- memcpy(output, buffer, buffer_size);
93
-
94
- target.Finalize();
95
- return target;
96
- }
97
- };
98
281
 
99
- struct HexHugeIntOperator {
100
- template <class INPUT_TYPE, class RESULT_TYPE>
101
- static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
102
- char buffer[sizeof(INPUT_TYPE) * 2];
103
- char *ptr = buffer;
104
- idx_t buffer_size = 0;
105
-
106
- uint64_t lower = input.lower;
107
- int64_t upper = input.upper;
108
-
109
- bool seen_non_zero = false;
110
- for (idx_t offset = 64; offset >= 4; offset -= 4) {
111
- uint8_t byte = (upper >> (offset - 4)) & 0x0F;
112
-
113
- if (byte == 0 && !seen_non_zero) {
114
- continue;
282
+ // Treated as a single byte
283
+ idx_t i = 0;
284
+ if (size % 8 != 0) {
285
+ uint8_t byte = 0;
286
+ for (idx_t j = size % 8; j > 0; --j) {
287
+ byte |= StringUtil::GetBinaryValue(data[i]) << (j - 1);
288
+ i++;
115
289
  }
116
- seen_non_zero = true;
117
- *ptr = Blob::HEX_TABLE[byte];
118
- ptr++;
119
- buffer_size++;
290
+ *output = byte;
291
+ output++;
120
292
  }
121
293
 
122
- for (idx_t offset = 64; offset >= 4; offset -= 4) {
123
- uint8_t byte = (lower >> (offset - 4)) & 0x0F;
124
-
125
- // at least one byte space
126
- if (byte == 0 && !seen_non_zero && offset > 4) {
127
- continue;
294
+ while (i < size) {
295
+ uint8_t byte = 0;
296
+ for (idx_t j = 8; j > 0; --j) {
297
+ byte |= StringUtil::GetBinaryValue(data[i]) << (j - 1);
298
+ i++;
128
299
  }
129
- seen_non_zero = true;
130
- *ptr = Blob::HEX_TABLE[byte];
131
- ptr++;
132
- buffer_size++;
300
+ *output = byte;
301
+ output++;
133
302
  }
134
303
 
135
- // Allocate empty space
136
- auto target = StringVector::EmptyString(result, buffer_size);
137
- auto output = target.GetDataWriteable();
138
- memcpy(output, buffer, buffer_size);
139
-
140
304
  target.Finalize();
141
305
  return target;
142
306
  }
143
307
  };
144
308
 
145
- static void ToHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
309
+ template <class INPUT, class OP>
310
+ static void ToBinaryFunction(DataChunk &args, ExpressionState &state, Vector &result) {
146
311
  D_ASSERT(args.ColumnCount() == 1);
147
312
  auto &input = args.data[0];
148
313
  idx_t count = args.size();
314
+ UnaryExecutor::ExecuteString<INPUT, string_t, OP>(input, result, count);
315
+ }
149
316
 
150
- switch (input.GetType().InternalType()) {
151
- case PhysicalType::VARCHAR:
152
- UnaryExecutor::ExecuteString<string_t, string_t, HexStrOperator>(input, result, count);
153
- break;
154
- case PhysicalType::INT64:
155
- UnaryExecutor::ExecuteString<int64_t, string_t, HexIntegralOperator>(input, result, count);
156
- break;
157
- case PhysicalType::INT128:
158
- UnaryExecutor::ExecuteString<hugeint_t, string_t, HexHugeIntOperator>(input, result, count);
159
- break;
160
- case PhysicalType::UINT64:
161
- UnaryExecutor::ExecuteString<uint64_t, string_t, HexIntegralOperator>(input, result, count);
162
- break;
163
- default:
164
- throw NotImplementedException("Specifier type not implemented");
165
- }
317
+ static void FromBinaryFunction(DataChunk &args, ExpressionState &state, Vector &result) {
318
+ D_ASSERT(args.ColumnCount() == 1);
319
+ D_ASSERT(args.data[0].GetType().InternalType() == PhysicalType::VARCHAR);
320
+ auto &input = args.data[0];
321
+ idx_t count = args.size();
322
+
323
+ UnaryExecutor::ExecuteString<string_t, string_t, FromBinaryOperator>(input, result, count);
166
324
  }
167
325
 
168
326
  static void FromHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
@@ -178,13 +336,17 @@ void HexFun::RegisterFunction(BuiltinFunctions &set) {
178
336
  ScalarFunctionSet to_hex("to_hex");
179
337
  ScalarFunctionSet from_hex("from_hex");
180
338
 
181
- to_hex.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToHexFunction));
339
+ to_hex.AddFunction(
340
+ ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToHexFunction<string_t, HexStrOperator>));
182
341
 
183
- to_hex.AddFunction(ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, ToHexFunction));
342
+ to_hex.AddFunction(
343
+ ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, ToHexFunction<int64_t, HexIntegralOperator>));
184
344
 
185
- to_hex.AddFunction(ScalarFunction({LogicalType::UBIGINT}, LogicalType::VARCHAR, ToHexFunction));
345
+ to_hex.AddFunction(
346
+ ScalarFunction({LogicalType::UBIGINT}, LogicalType::VARCHAR, ToHexFunction<uint64_t, HexIntegralOperator>));
186
347
 
187
- to_hex.AddFunction(ScalarFunction({LogicalType::HUGEINT}, LogicalType::VARCHAR, ToHexFunction));
348
+ to_hex.AddFunction(
349
+ ScalarFunction({LogicalType::HUGEINT}, LogicalType::VARCHAR, ToHexFunction<hugeint_t, HexHugeIntOperator>));
188
350
 
189
351
  from_hex.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::BLOB, FromHexFunction));
190
352
 
@@ -196,6 +358,27 @@ void HexFun::RegisterFunction(BuiltinFunctions &set) {
196
358
  from_hex.name = "unhex";
197
359
  set.AddFunction(to_hex);
198
360
  set.AddFunction(from_hex);
361
+
362
+ ScalarFunctionSet to_binary("to_binary");
363
+ ScalarFunctionSet from_binary("from_binary");
364
+ to_binary.AddFunction(
365
+ ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToBinaryFunction<string_t, BinaryStrOperator>));
366
+ to_binary.AddFunction(ScalarFunction({LogicalType::UBIGINT}, LogicalType::VARCHAR,
367
+ ToBinaryFunction<uint64_t, BinaryIntegralOperator>));
368
+ to_binary.AddFunction(
369
+ ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, ToBinaryFunction<int64_t, BinaryIntegralOperator>));
370
+ to_binary.AddFunction(ScalarFunction({LogicalType::HUGEINT}, LogicalType::VARCHAR,
371
+ ToBinaryFunction<hugeint_t, BinaryHugeIntOperator>));
372
+
373
+ from_binary.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::BLOB, FromBinaryFunction));
374
+
375
+ set.AddFunction(to_binary);
376
+ set.AddFunction(from_binary);
377
+
378
+ to_binary.name = "bin";
379
+ from_binary.name = "unbin";
380
+ set.AddFunction(to_binary);
381
+ set.AddFunction(from_binary);
199
382
  }
200
383
 
201
384
  } // namespace duckdb
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev1734"
2
+ #define DUCKDB_VERSION "0.7.2-dev1803"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "e8610c85fb"
5
+ #define DUCKDB_SOURCE_ID "cc6160599b"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -0,0 +1,147 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/bit_utils.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/hugeint.hpp"
12
+
13
+ #ifdef _MSC_VER
14
+ #define __restrict__
15
+ #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
16
+ #define __ORDER_LITTLE_ENDIAN__ 2
17
+ #include <intrin.h>
18
+ static inline int __builtin_ctzll(unsigned long long x) {
19
+ #ifdef _WIN64
20
+ unsigned long ret;
21
+ _BitScanForward64(&ret, x);
22
+ return (int)ret;
23
+ #else
24
+ unsigned long low, high;
25
+ bool low_set = _BitScanForward(&low, (unsigned __int32)(x)) != 0;
26
+ _BitScanForward(&high, (unsigned __int32)(x >> 32));
27
+ high += 32;
28
+ return low_set ? low : high;
29
+ #endif
30
+ }
31
+ static inline int __builtin_clzll(unsigned long long mask) {
32
+ unsigned long where;
33
+ // BitScanReverse scans from MSB to LSB for first set bit.
34
+ // Returns 0 if no set bit is found.
35
+ #if defined(_WIN64)
36
+ if (_BitScanReverse64(&where, mask))
37
+ return static_cast<int>(63 - where);
38
+ #elif defined(_WIN32)
39
+ // Scan the high 32 bits.
40
+ if (_BitScanReverse(&where, static_cast<unsigned long>(mask >> 32)))
41
+ return static_cast<int>(63 - (where + 32)); // Create a bit offset from the MSB.
42
+ // Scan the low 32 bits.
43
+ if (_BitScanReverse(&where, static_cast<unsigned long>(mask)))
44
+ return static_cast<int>(63 - where);
45
+ #else
46
+ #error "Implementation of __builtin_clzll required"
47
+ #endif
48
+ return 64; // Undefined Behavior.
49
+ }
50
+
51
+ static inline int __builtin_ctz(unsigned int value) {
52
+ unsigned long trailing_zero = 0;
53
+
54
+ if (_BitScanForward(&trailing_zero, value)) {
55
+ return trailing_zero;
56
+ } else {
57
+ // This is undefined, I better choose 32 than 0
58
+ return 32;
59
+ }
60
+ }
61
+
62
+ static inline int __builtin_clz(unsigned int value) {
63
+ unsigned long leading_zero = 0;
64
+
65
+ if (_BitScanReverse(&leading_zero, value)) {
66
+ return 31 - leading_zero;
67
+ } else {
68
+ // Same remarks as above
69
+ return 32;
70
+ }
71
+ }
72
+
73
+ #endif
74
+
75
+ namespace duckdb {
76
+
77
+ template <class T>
78
+ struct CountZeros {};
79
+
80
+ template <>
81
+ struct CountZeros<uint32_t> {
82
+ inline static int Leading(uint32_t value) {
83
+ if (!value) {
84
+ return 32;
85
+ }
86
+ return __builtin_clz(value);
87
+ }
88
+ inline static int Trailing(uint32_t value) {
89
+ if (!value) {
90
+ return 32;
91
+ }
92
+ return __builtin_ctz(value);
93
+ }
94
+ };
95
+
96
+ template <>
97
+ struct CountZeros<uint64_t> {
98
+ inline static int Leading(uint64_t value) {
99
+ if (!value) {
100
+ return 64;
101
+ }
102
+ return __builtin_clzll(value);
103
+ }
104
+ inline static int Trailing(uint64_t value) {
105
+ if (!value) {
106
+ return 64;
107
+ }
108
+ return __builtin_ctzll(value);
109
+ }
110
+ };
111
+
112
+ template <>
113
+ struct CountZeros<hugeint_t> {
114
+ inline static int Leading(hugeint_t value) {
115
+ if (value == 0) {
116
+ return 128;
117
+ }
118
+
119
+ uint64_t upper = (uint64_t)value.upper;
120
+ uint64_t lower = value.lower;
121
+
122
+ int res = __builtin_clzll(upper);
123
+ if (res == 64) {
124
+ res += __builtin_clzll(lower);
125
+ }
126
+
127
+ return res;
128
+ }
129
+
130
+ inline static int Trailing(hugeint_t value) {
131
+ if (value == 0) {
132
+ return 128;
133
+ }
134
+
135
+ uint64_t upper = (uint64_t)value.upper;
136
+ uint64_t lower = value.lower;
137
+
138
+ int res = __builtin_ctzll(lower);
139
+ if (res == 64) {
140
+ res += __builtin_ctzll(upper);
141
+ }
142
+
143
+ return res;
144
+ }
145
+ };
146
+
147
+ } // namespace duckdb
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include "duckdb/common/winapi.hpp"
4
+ #include "duckdb/common/string.hpp"
4
5
  #include <stdint.h>
5
6
 
6
7
  namespace duckdb {
@@ -38,6 +38,13 @@ public:
38
38
  throw InvalidInputException("Invalid input for hex digit: %s", string(c, 1));
39
39
  }
40
40
 
41
+ static uint8_t GetBinaryValue(char c) {
42
+ if (c >= '0' && c <= '1') {
43
+ return c - '0';
44
+ }
45
+ throw InvalidInputException("Invalid input for binary digit: %s", string(c, 1));
46
+ }
47
+
41
48
  DUCKDB_API static bool CharacterIsSpace(char c) {
42
49
  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
43
50
  }
@@ -387,7 +387,7 @@ public:
387
387
  DUCKDB_API static LogicalType DECIMAL(int width, int scale); // NOLINT
388
388
  DUCKDB_API static LogicalType VARCHAR_COLLATION(string collation); // NOLINT
389
389
  DUCKDB_API static LogicalType LIST(const LogicalType &child); // NOLINT
390
- DUCKDB_API static LogicalType STRUCT(const child_list_t<LogicalType> &children); // NOLINT
390
+ DUCKDB_API static LogicalType STRUCT(child_list_t<LogicalType> children); // NOLINT
391
391
  DUCKDB_API static LogicalType AGGREGATE_STATE(aggregate_state_t state_type); // NOLINT
392
392
  DUCKDB_API static LogicalType MAP(const LogicalType &child); // NOLINT
393
393
  DUCKDB_API static LogicalType MAP( child_list_t<LogicalType> children); // NOLINT
@@ -30,6 +30,10 @@ struct FloorFun {
30
30
  static void RegisterFunction(BuiltinFunctions &set);
31
31
  };
32
32
 
33
+ struct TruncFun {
34
+ static void RegisterFunction(BuiltinFunctions &set);
35
+ };
36
+
33
37
  struct RoundFun {
34
38
  static void RegisterFunction(BuiltinFunctions &set);
35
39
  };
@@ -24,8 +24,9 @@ enum class BlockState : uint8_t { BLOCK_UNLOADED = 0, BLOCK_LOADED = 1 };
24
24
 
25
25
  struct BufferPoolReservation {
26
26
  idx_t size {0};
27
+ BufferPool &pool;
27
28
 
28
- BufferPoolReservation() {
29
+ BufferPoolReservation(BufferPool &pool) : pool(pool) {
29
30
  }
30
31
  BufferPoolReservation(const BufferPoolReservation &) = delete;
31
32
  BufferPoolReservation &operator=(const BufferPoolReservation &) = delete;
@@ -35,18 +36,17 @@ struct BufferPoolReservation {
35
36
 
36
37
  ~BufferPoolReservation();
37
38
 
38
- void Resize(atomic<idx_t> &counter, idx_t new_size);
39
+ void Resize(idx_t new_size);
39
40
  void Merge(BufferPoolReservation &&src);
40
41
  };
41
42
 
42
43
  struct TempBufferPoolReservation : BufferPoolReservation {
43
- atomic<idx_t> &counter;
44
- TempBufferPoolReservation(atomic<idx_t> &counter, idx_t size) : counter(counter) {
45
- Resize(counter, size);
44
+ TempBufferPoolReservation(BufferPool &pool, idx_t size) : BufferPoolReservation(pool) {
45
+ Resize(size);
46
46
  }
47
47
  TempBufferPoolReservation(TempBufferPoolReservation &&) = default;
48
48
  ~TempBufferPoolReservation() {
49
- Resize(counter, 0);
49
+ Resize(0);
50
50
  }
51
51
  };
52
52
 
@@ -39,6 +39,8 @@ public:
39
39
  //! blocks can be evicted
40
40
  void SetLimit(idx_t limit, const char *exception_postscript);
41
41
 
42
+ void IncreaseUsedMemory(idx_t size);
43
+
42
44
  idx_t GetUsedMemory();
43
45
 
44
46
  idx_t GetMaxMemory();
@@ -17,6 +17,7 @@
17
17
  #include "duckdb/common/likely.hpp"
18
18
  #include "duckdb/storage/compression/chimp/algorithm/packed_data.hpp"
19
19
  #include "duckdb/common/limits.hpp"
20
+ #include "duckdb/common/bit_utils.hpp"
20
21
 
21
22
  #include "duckdb/storage/compression/chimp/algorithm/bit_reader.hpp"
22
23
  #include "duckdb/storage/compression/chimp/algorithm/output_bit_stream.hpp"
@@ -10,68 +10,6 @@
10
10
 
11
11
  #include "duckdb.h"
12
12
 
13
- #ifdef _MSC_VER
14
- #define __restrict__
15
- #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
16
- #define __ORDER_LITTLE_ENDIAN__ 2
17
- #include <intrin.h>
18
- static inline int __builtin_ctzll(unsigned long long x) {
19
- #ifdef _WIN64
20
- unsigned long ret;
21
- _BitScanForward64(&ret, x);
22
- return (int)ret;
23
- #else
24
- unsigned long low, high;
25
- bool low_set = _BitScanForward(&low, (unsigned __int32)(x)) != 0;
26
- _BitScanForward(&high, (unsigned __int32)(x >> 32));
27
- high += 32;
28
- return low_set ? low : high;
29
- #endif
30
- }
31
- static inline int __builtin_clzll(unsigned long long mask) {
32
- unsigned long where;
33
- // BitScanReverse scans from MSB to LSB for first set bit.
34
- // Returns 0 if no set bit is found.
35
- #if defined(_WIN64)
36
- if (_BitScanReverse64(&where, mask))
37
- return static_cast<int>(63 - where);
38
- #elif defined(_WIN32)
39
- // Scan the high 32 bits.
40
- if (_BitScanReverse(&where, static_cast<unsigned long>(mask >> 32)))
41
- return static_cast<int>(63 - (where + 32)); // Create a bit offset from the MSB.
42
- // Scan the low 32 bits.
43
- if (_BitScanReverse(&where, static_cast<unsigned long>(mask)))
44
- return static_cast<int>(63 - where);
45
- #else
46
- #error "Implementation of __builtin_clzll required"
47
- #endif
48
- return 64; // Undefined Behavior.
49
- }
50
-
51
- static inline int __builtin_ctz(unsigned int value) {
52
- unsigned long trailing_zero = 0;
53
-
54
- if (_BitScanForward(&trailing_zero, value)) {
55
- return trailing_zero;
56
- } else {
57
- // This is undefined, I better choose 32 than 0
58
- return 32;
59
- }
60
- }
61
-
62
- static inline int __builtin_clz(unsigned int value) {
63
- unsigned long leading_zero = 0;
64
-
65
- if (_BitScanReverse(&leading_zero, value)) {
66
- return 31 - leading_zero;
67
- } else {
68
- // Same remarks as above
69
- return 32;
70
- }
71
- }
72
-
73
- #endif
74
-
75
13
  namespace duckdb {
76
14
 
77
15
  template <class T>
@@ -89,41 +27,6 @@ struct SignificantBits<uint32_t> {
89
27
  static constexpr uint8_t mask = ((uint8_t)1 << size) - 1;
90
28
  };
91
29
 
92
- template <class T>
93
- struct CountZeros {};
94
-
95
- template <>
96
- struct CountZeros<uint32_t> {
97
- inline static int Leading(uint32_t value) {
98
- if (!value) {
99
- return 32;
100
- }
101
- return __builtin_clz(value);
102
- }
103
- inline static int Trailing(uint32_t value) {
104
- if (!value) {
105
- return 32;
106
- }
107
- return __builtin_ctz(value);
108
- }
109
- };
110
-
111
- template <>
112
- struct CountZeros<uint64_t> {
113
- inline static int Leading(uint64_t value) {
114
- if (!value) {
115
- return 64;
116
- }
117
- return __builtin_clzll(value);
118
- }
119
- inline static int Trailing(uint64_t value) {
120
- if (!value) {
121
- return 64;
122
- }
123
- return __builtin_ctzll(value);
124
- }
125
- };
126
-
127
30
  struct ChimpConstants {
128
31
  struct Compression {
129
32
  static constexpr uint8_t LEADING_ROUND[] = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 12, 12, 12, 12,
@@ -14,6 +14,7 @@
14
14
  #include "duckdb/storage/compression/chimp/algorithm/chimp_utils.hpp"
15
15
  #include "duckdb/storage/compression/chimp/algorithm/packed_data.hpp"
16
16
  #include "duckdb/storage/compression/patas/shared.hpp"
17
+ #include "duckdb/common/bit_utils.hpp"
17
18
 
18
19
  namespace duckdb {
19
20
 
@@ -44,17 +44,18 @@ unique_ptr<Expression> RegexOptimizationRule::Apply(LogicalOperator &op, vector<
44
44
 
45
45
  if (pattern.Regexp()->op() == duckdb_re2::kRegexpLiteralString ||
46
46
  pattern.Regexp()->op() == duckdb_re2::kRegexpLiteral) {
47
- auto contains = make_uniq<BoundFunctionExpression>(root.return_type, ContainsFun::GetFunction(),
48
- std::move(root.children), nullptr);
49
47
 
50
48
  string min;
51
49
  string max;
52
- pattern.PossibleMatchRange(&min, &max, patt_str.size());
53
- if (min == max) {
54
- contains->children[1] = make_uniq<BoundConstantExpression>(Value(std::move(min)));
55
- } else {
56
- contains->children[1] = make_uniq<BoundConstantExpression>(Value(std::move(patt_str)));
50
+ pattern.PossibleMatchRange(&min, &max, patt_str.size() + 1);
51
+ if (min != max) {
52
+ return nullptr;
57
53
  }
54
+ auto parameter = make_uniq<BoundConstantExpression>(Value(std::move(min)));
55
+ auto contains = make_uniq<BoundFunctionExpression>(root.return_type, ContainsFun::GetFunction(),
56
+ std::move(root.children), nullptr);
57
+ contains->children[1] = std::move(parameter);
58
+
58
59
  return std::move(contains);
59
60
  }
60
61
  return nullptr;
@@ -9,7 +9,7 @@ namespace duckdb {
9
9
 
10
10
  BlockHandle::BlockHandle(BlockManager &block_manager, block_id_t block_id_p)
11
11
  : block_manager(block_manager), readers(0), block_id(block_id_p), buffer(nullptr), eviction_timestamp(0),
12
- can_destroy(false), unswizzled(nullptr) {
12
+ can_destroy(false), memory_charge(block_manager.buffer_manager.GetBufferPool()), unswizzled(nullptr) {
13
13
  eviction_timestamp = 0;
14
14
  state = BlockState::BLOCK_UNLOADED;
15
15
  memory_usage = Storage::BLOCK_ALLOC_SIZE;
@@ -18,7 +18,7 @@ BlockHandle::BlockHandle(BlockManager &block_manager, block_id_t block_id_p)
18
18
  BlockHandle::BlockHandle(BlockManager &block_manager, block_id_t block_id_p, unique_ptr<FileBuffer> buffer_p,
19
19
  bool can_destroy_p, idx_t block_size, BufferPoolReservation &&reservation)
20
20
  : block_manager(block_manager), readers(0), block_id(block_id_p), eviction_timestamp(0), can_destroy(can_destroy_p),
21
- unswizzled(nullptr) {
21
+ memory_charge(block_manager.buffer_manager.GetBufferPool()), unswizzled(nullptr) {
22
22
  buffer = std::move(buffer_p);
23
23
  state = BlockState::BLOCK_LOADED;
24
24
  memory_usage = block_size;
@@ -34,7 +34,7 @@ BlockHandle::~BlockHandle() { // NOLINT: allow internal exceptions
34
34
  D_ASSERT(memory_charge.size > 0);
35
35
  // the block is still loaded in memory: erase it
36
36
  buffer.reset();
37
- memory_charge.Resize(buffer_manager.buffer_pool.current_memory, 0);
37
+ memory_charge.Resize(0);
38
38
  } else {
39
39
  D_ASSERT(memory_charge.size == 0);
40
40
  }
@@ -97,7 +97,7 @@ unique_ptr<FileBuffer> BlockHandle::UnloadAndTakeBlock() {
97
97
  // temporary block that cannot be destroyed: write to temporary file
98
98
  block_manager.buffer_manager.WriteTemporaryBuffer(block_id, *buffer);
99
99
  }
100
- memory_charge.Resize(block_manager.buffer_manager.buffer_pool.current_memory, 0);
100
+ memory_charge.Resize(0);
101
101
  state = BlockState::BLOCK_UNLOADED;
102
102
  return std::move(buffer);
103
103
  }
@@ -50,6 +50,10 @@ void BufferPool::AddToEvictionQueue(shared_ptr<BlockHandle> &handle) {
50
50
  queue->q.enqueue(BufferEvictionNode(weak_ptr<BlockHandle>(handle), handle->eviction_timestamp));
51
51
  }
52
52
 
53
+ void BufferPool::IncreaseUsedMemory(idx_t size) {
54
+ current_memory += size;
55
+ }
56
+
53
57
  idx_t BufferPool::GetUsedMemory() {
54
58
  return current_memory;
55
59
  }
@@ -60,12 +64,12 @@ idx_t BufferPool::GetMaxMemory() {
60
64
  BufferPool::EvictionResult BufferPool::EvictBlocks(idx_t extra_memory, idx_t memory_limit,
61
65
  unique_ptr<FileBuffer> *buffer) {
62
66
  BufferEvictionNode node;
63
- TempBufferPoolReservation r(current_memory, extra_memory);
67
+ TempBufferPoolReservation r(*this, extra_memory);
64
68
  while (current_memory > memory_limit) {
65
69
  // get a block to unpin from the queue
66
70
  if (!queue->q.try_dequeue(node)) {
67
71
  // Failed to reserve. Adjust size of temp reservation to 0.
68
- r.Resize(current_memory, 0);
72
+ r.Resize(0);
69
73
  return {false, std::move(r)};
70
74
  }
71
75
  // get a reference to the underlying block pointer
@@ -1,8 +1,9 @@
1
1
  #include "duckdb/storage/buffer/block_handle.hpp"
2
+ #include "duckdb/storage/buffer/buffer_pool.hpp"
2
3
 
3
4
  namespace duckdb {
4
5
 
5
- BufferPoolReservation::BufferPoolReservation(BufferPoolReservation &&src) noexcept {
6
+ BufferPoolReservation::BufferPoolReservation(BufferPoolReservation &&src) noexcept : pool(src.pool) {
6
7
  size = src.size;
7
8
  src.size = 0;
8
9
  }
@@ -17,10 +18,9 @@ BufferPoolReservation::~BufferPoolReservation() {
17
18
  D_ASSERT(size == 0);
18
19
  }
19
20
 
20
- void BufferPoolReservation::Resize(atomic<idx_t> &counter, idx_t new_size) {
21
+ void BufferPoolReservation::Resize(idx_t new_size) {
21
22
  int64_t delta = (int64_t)new_size - size;
22
- D_ASSERT(delta > 0 || (int64_t)counter >= -delta);
23
- counter += delta;
23
+ pool.IncreaseUsedMemory(delta);
24
24
  size = new_size;
25
25
  }
26
26
 
@@ -131,7 +131,7 @@ void BufferManager::ReAllocate(shared_ptr<BlockHandle> &handle, idx_t block_size
131
131
  handle->memory_charge.Merge(std::move(reservation));
132
132
  } else {
133
133
  // no need to evict blocks, but we do need to decrement 'current_memory'.
134
- handle->memory_charge.Resize(buffer_pool.current_memory, req.alloc_size);
134
+ handle->memory_charge.Resize(req.alloc_size);
135
135
  }
136
136
 
137
137
  // resize and adjust current memory
@@ -163,7 +163,7 @@ BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
163
163
  if (handle->state == BlockState::BLOCK_LOADED) {
164
164
  // the block is loaded, increment the reader count and return a pointer to the handle
165
165
  handle->readers++;
166
- reservation.Resize(buffer_pool.current_memory, 0);
166
+ reservation.Resize(0);
167
167
  return handle->Load(handle);
168
168
  }
169
169
  // now we can actually load the current block
@@ -176,7 +176,7 @@ BufferHandle BufferManager::Pin(shared_ptr<BlockHandle> &handle) {
176
176
  if (delta) {
177
177
  D_ASSERT(delta < 0);
178
178
  handle->memory_usage += delta;
179
- handle->memory_charge.Resize(buffer_pool.current_memory, handle->memory_usage);
179
+ handle->memory_charge.Resize(handle->memory_usage);
180
180
  }
181
181
  D_ASSERT(handle->memory_usage == handle->buffer->AllocSize());
182
182
  return buf;
@@ -731,9 +731,9 @@ data_ptr_t BufferManager::BufferAllocatorAllocate(PrivateAllocatorData *private_
731
731
 
732
732
  void BufferManager::BufferAllocatorFree(PrivateAllocatorData *private_data, data_ptr_t pointer, idx_t size) {
733
733
  auto &data = (BufferAllocatorData &)*private_data;
734
- BufferPoolReservation r;
734
+ BufferPoolReservation r(data.manager.GetBufferPool());
735
735
  r.size = size;
736
- r.Resize(data.manager.buffer_pool.current_memory, 0);
736
+ r.Resize(0);
737
737
  return Allocator::Get(data.manager.db).FreeData(pointer, size);
738
738
  }
739
739
 
@@ -743,9 +743,9 @@ data_ptr_t BufferManager::BufferAllocatorRealloc(PrivateAllocatorData *private_d
743
743
  return pointer;
744
744
  }
745
745
  auto &data = (BufferAllocatorData &)*private_data;
746
- BufferPoolReservation r;
746
+ BufferPoolReservation r(data.manager.GetBufferPool());
747
747
  r.size = old_size;
748
- r.Resize(data.manager.buffer_pool.current_memory, size);
748
+ r.Resize(size);
749
749
  r.size = 0;
750
750
  return Allocator::Get(data.manager.db).ReallocateData(pointer, old_size, size);
751
751
  }