duckdb 0.7.2-dev1684.0 → 0.7.2-dev1803.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-datefunc.cpp +20 -8
  3. package/src/duckdb/extension/icu/icu-strptime.cpp +117 -29
  4. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -0
  5. package/src/duckdb/src/common/local_file_system.cpp +13 -2
  6. package/src/duckdb/src/common/sort/partition_state.cpp +644 -0
  7. package/src/duckdb/src/common/types.cpp +2 -2
  8. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +77 -849
  9. package/src/duckdb/src/function/scalar/math/numeric.cpp +57 -0
  10. package/src/duckdb/src/function/scalar/math_functions.cpp +1 -0
  11. package/src/duckdb/src/function/scalar/string/hex.cpp +261 -78
  12. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +2 -2
  13. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  14. package/src/duckdb/src/include/duckdb/common/bit_utils.hpp +147 -0
  15. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +1 -0
  16. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +247 -0
  17. package/src/duckdb/src/include/duckdb/common/string_util.hpp +7 -0
  18. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -1
  19. package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +4 -0
  20. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +7 -8
  21. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +79 -0
  22. package/src/duckdb/src/include/duckdb/storage/buffer/temporary_file_information.hpp +12 -0
  23. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +3 -59
  24. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/chimp128.hpp +1 -0
  25. package/src/duckdb/src/include/duckdb/storage/compression/chimp/algorithm/chimp_utils.hpp +0 -97
  26. package/src/duckdb/src/include/duckdb/storage/compression/patas/algorithm/patas.hpp +1 -0
  27. package/src/duckdb/src/main/extension/extension_install.cpp +11 -0
  28. package/src/duckdb/src/main/extension/extension_load.cpp +29 -3
  29. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +8 -7
  30. package/src/duckdb/src/storage/buffer/block_handle.cpp +128 -0
  31. package/src/duckdb/src/storage/buffer/block_manager.cpp +81 -0
  32. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +136 -0
  33. package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +32 -0
  34. package/src/duckdb/src/storage/buffer_manager.cpp +7 -358
  35. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +3 -5
  36. package/src/duckdb/ub_src_common_sort.cpp +2 -0
  37. package/src/duckdb/ub_src_storage_buffer.cpp +8 -0
@@ -0,0 +1,247 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/common/sort/partition_state.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/sort/sort.hpp"
12
+ #include "duckdb/common/types/partitioned_column_data.hpp"
13
+ #include "duckdb/common/radix_partitioning.hpp"
14
+ #include "duckdb/parallel/base_pipeline_event.hpp"
15
+
16
+ namespace duckdb {
17
+
18
+ class PartitionGlobalHashGroup {
19
+ public:
20
+ using GlobalSortStatePtr = unique_ptr<GlobalSortState>;
21
+ using LocalSortStatePtr = unique_ptr<LocalSortState>;
22
+ using Orders = vector<BoundOrderByNode>;
23
+ using Types = vector<LogicalType>;
24
+
25
+ PartitionGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
26
+ const Types &payload_types, bool external);
27
+
28
+ void ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask);
29
+
30
+ GlobalSortStatePtr global_sort;
31
+ atomic<idx_t> count;
32
+
33
+ // Mask computation
34
+ SortLayout partition_layout;
35
+ };
36
+
37
+ class PartitionGlobalSinkState {
38
+ public:
39
+ using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
40
+ using Orders = vector<BoundOrderByNode>;
41
+ using Types = vector<LogicalType>;
42
+
43
+ using GroupingPartition = unique_ptr<PartitionedColumnData>;
44
+ using GroupingAppend = unique_ptr<PartitionedColumnDataAppendState>;
45
+
46
+ PartitionGlobalSinkState(ClientContext &context, const vector<unique_ptr<Expression>> &partitions_p,
47
+ const vector<BoundOrderByNode> &orders_p, const Types &payload_types,
48
+ const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
49
+
50
+ void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
51
+ void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
52
+
53
+ void BuildSortState(ColumnDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
54
+
55
+ ClientContext &context;
56
+ BufferManager &buffer_manager;
57
+ Allocator &allocator;
58
+ mutex lock;
59
+
60
+ // OVER(PARTITION BY...) (hash grouping)
61
+ unique_ptr<RadixPartitionedColumnData> grouping_data;
62
+ //! Payload plus hash column
63
+ Types grouping_types;
64
+
65
+ // OVER(...) (sorting)
66
+ Orders partitions;
67
+ Orders orders;
68
+ const Types payload_types;
69
+ vector<HashGroupPtr> hash_groups;
70
+ bool external;
71
+
72
+ // OVER() (no sorting)
73
+ unique_ptr<RowDataCollection> rows;
74
+ unique_ptr<RowDataCollection> strings;
75
+
76
+ // Threading
77
+ idx_t memory_per_thread;
78
+ atomic<idx_t> count;
79
+
80
+ private:
81
+ void ResizeGroupingData(idx_t cardinality);
82
+ void SyncLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
83
+ };
84
+
85
+ class PartitionLocalSinkState {
86
+ public:
87
+ PartitionLocalSinkState(ClientContext &context, PartitionGlobalSinkState &gstate_p);
88
+
89
+ // Global state
90
+ PartitionGlobalSinkState &gstate;
91
+ Allocator &allocator;
92
+
93
+ // OVER(PARTITION BY...) (hash grouping)
94
+ ExpressionExecutor executor;
95
+ DataChunk group_chunk;
96
+ DataChunk payload_chunk;
97
+ unique_ptr<PartitionedColumnData> local_partition;
98
+ unique_ptr<PartitionedColumnDataAppendState> local_append;
99
+
100
+ // OVER(...) (sorting)
101
+ size_t sort_cols;
102
+
103
+ // OVER() (no sorting)
104
+ RowLayout payload_layout;
105
+ unique_ptr<RowDataCollection> rows;
106
+ unique_ptr<RowDataCollection> strings;
107
+
108
+ //! Compute the hash values
109
+ void Hash(DataChunk &input_chunk, Vector &hash_vector);
110
+ //! Sink an input chunk
111
+ void Sink(DataChunk &input_chunk);
112
+ //! Merge the state into the global state.
113
+ void Combine();
114
+ };
115
+
116
+ enum class PartitionSortStage : uint8_t { INIT, PREPARE, MERGE, SORTED };
117
+
118
+ class PartitionLocalMergeState;
119
+
120
+ class PartitionGlobalMergeState {
121
+ public:
122
+ using GroupDataPtr = unique_ptr<ColumnDataCollection>;
123
+
124
+ explicit PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data);
125
+
126
+ bool IsSorted() const {
127
+ lock_guard<mutex> guard(lock);
128
+ return stage == PartitionSortStage::SORTED;
129
+ }
130
+
131
+ bool AssignTask(PartitionLocalMergeState &local_state);
132
+ bool TryPrepareNextStage();
133
+ void CompleteTask();
134
+
135
+ PartitionGlobalSinkState &sink;
136
+ GroupDataPtr group_data;
137
+ PartitionGlobalHashGroup *hash_group;
138
+ GlobalSortState *global_sort;
139
+
140
+ private:
141
+ mutable mutex lock;
142
+ PartitionSortStage stage;
143
+ idx_t total_tasks;
144
+ idx_t tasks_assigned;
145
+ idx_t tasks_completed;
146
+ };
147
+
148
+ class PartitionLocalMergeState {
149
+ public:
150
+ PartitionLocalMergeState() : merge_state(nullptr), stage(PartitionSortStage::INIT) {
151
+ finished = true;
152
+ }
153
+
154
+ bool TaskFinished() {
155
+ return finished;
156
+ }
157
+
158
+ void Prepare();
159
+ void Merge();
160
+
161
+ void ExecuteTask();
162
+
163
+ PartitionGlobalMergeState *merge_state;
164
+ PartitionSortStage stage;
165
+ atomic<bool> finished;
166
+ };
167
+
168
+ class PartitionGlobalMergeStates {
169
+ public:
170
+ using PartitionGlobalMergeStatePtr = unique_ptr<PartitionGlobalMergeState>;
171
+
172
+ explicit PartitionGlobalMergeStates(PartitionGlobalSinkState &sink);
173
+
174
+ vector<PartitionGlobalMergeStatePtr> states;
175
+ };
176
+
177
+ class PartitionMergeEvent : public BasePipelineEvent {
178
+ public:
179
+ PartitionMergeEvent(PartitionGlobalSinkState &gstate_p, Pipeline &pipeline_p)
180
+ : BasePipelineEvent(pipeline_p), gstate(gstate_p), merge_states(gstate_p) {
181
+ }
182
+
183
+ PartitionGlobalSinkState &gstate;
184
+ PartitionGlobalMergeStates merge_states;
185
+
186
+ public:
187
+ void Schedule() override;
188
+ };
189
+
190
+ class PartitionGlobalSourceState {
191
+ public:
192
+ explicit PartitionGlobalSourceState(PartitionGlobalSinkState &gsink_p) : gsink(gsink_p), next_bin(0) {
193
+ }
194
+
195
+ PartitionGlobalSinkState &gsink;
196
+ //! The output read position.
197
+ atomic<idx_t> next_bin;
198
+
199
+ public:
200
+ idx_t MaxThreads() {
201
+ // If there is only one partition, we have to process it on one thread.
202
+ if (!gsink.grouping_data) {
203
+ return 1;
204
+ }
205
+
206
+ // If there is not a lot of data, process serially.
207
+ if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
208
+ return 1;
209
+ }
210
+
211
+ return gsink.hash_groups.size();
212
+ }
213
+ };
214
+
215
+ // Per-thread read state
216
+ class PartitionLocalSourceState {
217
+ public:
218
+ using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
219
+
220
+ explicit PartitionLocalSourceState(PartitionGlobalSinkState &gstate_p);
221
+
222
+ void MaterializeSortedData();
223
+ idx_t GeneratePartition(const idx_t hash_bin);
224
+
225
+ PartitionGlobalSinkState &gstate;
226
+
227
+ //! The read partition
228
+ idx_t hash_bin;
229
+ HashGroupPtr hash_group;
230
+
231
+ //! The generated input chunks
232
+ unique_ptr<RowDataCollection> rows;
233
+ unique_ptr<RowDataCollection> heap;
234
+ RowLayout layout;
235
+ //! The partition boundary mask
236
+ vector<validity_t> partition_bits;
237
+ ValidityMask partition_mask;
238
+ //! The order boundary mask
239
+ vector<validity_t> order_bits;
240
+ ValidityMask order_mask;
241
+ //! The read cursor
242
+ unique_ptr<RowDataCollectionScanner> scanner;
243
+ //! Buffer for the inputs
244
+ DataChunk input_chunk;
245
+ };
246
+
247
+ } // namespace duckdb
@@ -38,6 +38,13 @@ public:
38
38
  throw InvalidInputException("Invalid input for hex digit: %s", string(c, 1));
39
39
  }
40
40
 
41
+ static uint8_t GetBinaryValue(char c) {
42
+ if (c >= '0' && c <= '1') {
43
+ return c - '0';
44
+ }
45
+ throw InvalidInputException("Invalid input for binary digit: %s", string(c, 1));
46
+ }
47
+
41
48
  DUCKDB_API static bool CharacterIsSpace(char c) {
42
49
  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
43
50
  }
@@ -387,7 +387,7 @@ public:
387
387
  DUCKDB_API static LogicalType DECIMAL(int width, int scale); // NOLINT
388
388
  DUCKDB_API static LogicalType VARCHAR_COLLATION(string collation); // NOLINT
389
389
  DUCKDB_API static LogicalType LIST(const LogicalType &child); // NOLINT
390
- DUCKDB_API static LogicalType STRUCT(const child_list_t<LogicalType> &children); // NOLINT
390
+ DUCKDB_API static LogicalType STRUCT(child_list_t<LogicalType> children); // NOLINT
391
391
  DUCKDB_API static LogicalType AGGREGATE_STATE(aggregate_state_t state_type); // NOLINT
392
392
  DUCKDB_API static LogicalType MAP(const LogicalType &child); // NOLINT
393
393
  DUCKDB_API static LogicalType MAP( child_list_t<LogicalType> children); // NOLINT
@@ -30,6 +30,10 @@ struct FloorFun {
30
30
  static void RegisterFunction(BuiltinFunctions &set);
31
31
  };
32
32
 
33
+ struct TruncFun {
34
+ static void RegisterFunction(BuiltinFunctions &set);
35
+ };
36
+
33
37
  struct RoundFun {
34
38
  static void RegisterFunction(BuiltinFunctions &set);
35
39
  };
@@ -12,21 +12,21 @@
12
12
  #include "duckdb/common/common.hpp"
13
13
  #include "duckdb/common/mutex.hpp"
14
14
  #include "duckdb/storage/storage_info.hpp"
15
+ #include "duckdb/common/file_buffer.hpp"
15
16
 
16
17
  namespace duckdb {
17
18
  class BlockManager;
18
19
  class BufferHandle;
19
- class BufferManager;
20
20
  class BufferPool;
21
21
  class DatabaseInstance;
22
- class FileBuffer;
23
22
 
24
23
  enum class BlockState : uint8_t { BLOCK_UNLOADED = 0, BLOCK_LOADED = 1 };
25
24
 
26
25
  struct BufferPoolReservation {
27
26
  idx_t size {0};
27
+ BufferPool &pool;
28
28
 
29
- BufferPoolReservation() {
29
+ BufferPoolReservation(BufferPool &pool) : pool(pool) {
30
30
  }
31
31
  BufferPoolReservation(const BufferPoolReservation &) = delete;
32
32
  BufferPoolReservation &operator=(const BufferPoolReservation &) = delete;
@@ -36,18 +36,17 @@ struct BufferPoolReservation {
36
36
 
37
37
  ~BufferPoolReservation();
38
38
 
39
- void Resize(atomic<idx_t> &counter, idx_t new_size);
39
+ void Resize(idx_t new_size);
40
40
  void Merge(BufferPoolReservation &&src);
41
41
  };
42
42
 
43
43
  struct TempBufferPoolReservation : BufferPoolReservation {
44
- atomic<idx_t> &counter;
45
- TempBufferPoolReservation(atomic<idx_t> &counter, idx_t size) : counter(counter) {
46
- Resize(counter, size);
44
+ TempBufferPoolReservation(BufferPool &pool, idx_t size) : BufferPoolReservation(pool) {
45
+ Resize(size);
47
46
  }
48
47
  TempBufferPoolReservation(TempBufferPoolReservation &&) = default;
49
48
  ~TempBufferPoolReservation() {
50
- Resize(counter, 0);
49
+ Resize(0);
51
50
  }
52
51
  };
53
52
 
@@ -0,0 +1,79 @@
1
+ #pragma once
2
+
3
+ #include "duckdb/common/mutex.hpp"
4
+ #include "duckdb/common/file_buffer.hpp"
5
+ #include "duckdb/storage/buffer/block_handle.hpp"
6
+
7
+ namespace duckdb {
8
+
9
+ struct EvictionQueue;
10
+
11
+ struct BufferEvictionNode {
12
+ BufferEvictionNode() {
13
+ }
14
+ BufferEvictionNode(weak_ptr<BlockHandle> handle_p, idx_t timestamp_p)
15
+ : handle(std::move(handle_p)), timestamp(timestamp_p) {
16
+ D_ASSERT(!handle.expired());
17
+ }
18
+
19
+ weak_ptr<BlockHandle> handle;
20
+ idx_t timestamp;
21
+
22
+ bool CanUnload(BlockHandle &handle_p);
23
+
24
+ shared_ptr<BlockHandle> TryGetBlockHandle();
25
+ };
26
+
27
+ //! The BufferPool is in charge of handling memory management for one or more databases. It defines memory limits
28
+ //! and implements priority eviction among all users of the pool.
29
+ class BufferPool {
30
+ friend class BlockHandle;
31
+ friend class BlockManager;
32
+ friend class BufferManager;
33
+
34
+ public:
35
+ explicit BufferPool(idx_t maximum_memory);
36
+ virtual ~BufferPool();
37
+
38
+ //! Set a new memory limit to the buffer pool, throws an exception if the new limit is too low and not enough
39
+ //! blocks can be evicted
40
+ void SetLimit(idx_t limit, const char *exception_postscript);
41
+
42
+ void IncreaseUsedMemory(idx_t size);
43
+
44
+ idx_t GetUsedMemory();
45
+
46
+ idx_t GetMaxMemory();
47
+
48
+ protected:
49
+ //! Evict blocks until the currently used memory + extra_memory fit, returns false if this was not possible
50
+ //! (i.e. not enough blocks could be evicted)
51
+ //! If the "buffer" argument is specified AND the system can find a buffer to re-use for the given allocation size
52
+ //! "buffer" will be made to point to the re-usable memory. Note that this is not guaranteed.
53
+ //! Returns a pair. result.first indicates if eviction was successful. result.second contains the
54
+ //! reservation handle, which can be moved to the BlockHandle that will own the reservation.
55
+ struct EvictionResult {
56
+ bool success;
57
+ TempBufferPoolReservation reservation;
58
+ };
59
+ virtual EvictionResult EvictBlocks(idx_t extra_memory, idx_t memory_limit,
60
+ unique_ptr<FileBuffer> *buffer = nullptr);
61
+
62
+ //! Garbage collect eviction queue
63
+ void PurgeQueue();
64
+ void AddToEvictionQueue(shared_ptr<BlockHandle> &handle);
65
+
66
+ private:
67
+ //! The lock for changing the memory limit
68
+ mutex limit_lock;
69
+ //! The current amount of memory that is occupied by the buffer manager (in bytes)
70
+ atomic<idx_t> current_memory;
71
+ //! The maximum amount of memory that the buffer manager can keep (in bytes)
72
+ atomic<idx_t> maximum_memory;
73
+ //! Eviction queue
74
+ unique_ptr<EvictionQueue> queue;
75
+ //! Total number of insertions into the eviction queue. This guides the schedule for calling PurgeQueue.
76
+ atomic<uint32_t> queue_insertions;
77
+ };
78
+
79
+ } // namespace duckdb
@@ -0,0 +1,12 @@
1
+ #pragma once
2
+
3
+ #include "duckdb/common/common.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ struct TemporaryFileInformation {
8
+ string path;
9
+ idx_t size;
10
+ };
11
+
12
+ } // namespace duckdb
@@ -16,6 +16,8 @@
16
16
  #include "duckdb/storage/block_manager.hpp"
17
17
  #include "duckdb/storage/buffer/block_handle.hpp"
18
18
  #include "duckdb/storage/buffer/buffer_handle.hpp"
19
+ #include "duckdb/storage/buffer/buffer_pool.hpp"
20
+ #include "duckdb/storage/buffer/temporary_file_information.hpp"
19
21
 
20
22
  namespace duckdb {
21
23
  class BlockManager;
@@ -23,65 +25,7 @@ class DatabaseInstance;
23
25
  class TemporaryDirectoryHandle;
24
26
  struct EvictionQueue;
25
27
 
26
- //! The BufferPool is in charge of handling memory management for one or more databases. It defines memory limits
27
- //! and implements priority eviction among all users of the pool.
28
- class BufferPool {
29
- friend class BlockHandle;
30
- friend class BlockManager;
31
- friend class BufferManager;
32
-
33
- public:
34
- explicit BufferPool(idx_t maximum_memory);
35
- virtual ~BufferPool();
36
-
37
- //! Set a new memory limit to the buffer pool, throws an exception if the new limit is too low and not enough
38
- //! blocks can be evicted
39
- void SetLimit(idx_t limit, const char *exception_postscript);
40
-
41
- idx_t GetUsedMemory() {
42
- return current_memory;
43
- }
44
- idx_t GetMaxMemory() {
45
- return maximum_memory;
46
- }
47
-
48
- protected:
49
- //! Evict blocks until the currently used memory + extra_memory fit, returns false if this was not possible
50
- //! (i.e. not enough blocks could be evicted)
51
- //! If the "buffer" argument is specified AND the system can find a buffer to re-use for the given allocation size
52
- //! "buffer" will be made to point to the re-usable memory. Note that this is not guaranteed.
53
- //! Returns a pair. result.first indicates if eviction was successful. result.second contains the
54
- //! reservation handle, which can be moved to the BlockHandle that will own the reservation.
55
- struct EvictionResult {
56
- bool success;
57
- TempBufferPoolReservation reservation;
58
- };
59
- virtual EvictionResult EvictBlocks(idx_t extra_memory, idx_t memory_limit,
60
- unique_ptr<FileBuffer> *buffer = nullptr);
61
-
62
- //! Garbage collect eviction queue
63
- void PurgeQueue();
64
- void AddToEvictionQueue(shared_ptr<BlockHandle> &handle);
65
-
66
- private:
67
- //! The lock for changing the memory limit
68
- mutex limit_lock;
69
- //! The current amount of memory that is occupied by the buffer manager (in bytes)
70
- atomic<idx_t> current_memory;
71
- //! The maximum amount of memory that the buffer manager can keep (in bytes)
72
- atomic<idx_t> maximum_memory;
73
- //! Eviction queue
74
- unique_ptr<EvictionQueue> queue;
75
- //! Total number of insertions into the eviction queue. This guides the schedule for calling PurgeQueue.
76
- atomic<uint32_t> queue_insertions;
77
- };
78
-
79
- struct TemporaryFileInformation {
80
- string path;
81
- idx_t size;
82
- };
83
-
84
- //! The BufferManager is in charge of handling memory management for a singke database. It cooperatively shares a
28
+ //! The BufferManager is in charge of handling memory management for a single database. It cooperatively shares a
85
29
  //! BufferPool with other BufferManagers, belonging to different databases. It hands out memory buffers that can
86
30
  //! be used by the database internally, and offers configuration options specific to a database, which need not be
87
31
  //! shared by the BufferPool, including whether to support swapping temp buffers to disk, and where to swap them to.
@@ -17,6 +17,7 @@
17
17
  #include "duckdb/common/likely.hpp"
18
18
  #include "duckdb/storage/compression/chimp/algorithm/packed_data.hpp"
19
19
  #include "duckdb/common/limits.hpp"
20
+ #include "duckdb/common/bit_utils.hpp"
20
21
 
21
22
  #include "duckdb/storage/compression/chimp/algorithm/bit_reader.hpp"
22
23
  #include "duckdb/storage/compression/chimp/algorithm/output_bit_stream.hpp"
@@ -10,68 +10,6 @@
10
10
 
11
11
  #include "duckdb.h"
12
12
 
13
- #ifdef _MSC_VER
14
- #define __restrict__
15
- #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
16
- #define __ORDER_LITTLE_ENDIAN__ 2
17
- #include <intrin.h>
18
- static inline int __builtin_ctzll(unsigned long long x) {
19
- #ifdef _WIN64
20
- unsigned long ret;
21
- _BitScanForward64(&ret, x);
22
- return (int)ret;
23
- #else
24
- unsigned long low, high;
25
- bool low_set = _BitScanForward(&low, (unsigned __int32)(x)) != 0;
26
- _BitScanForward(&high, (unsigned __int32)(x >> 32));
27
- high += 32;
28
- return low_set ? low : high;
29
- #endif
30
- }
31
- static inline int __builtin_clzll(unsigned long long mask) {
32
- unsigned long where;
33
- // BitScanReverse scans from MSB to LSB for first set bit.
34
- // Returns 0 if no set bit is found.
35
- #if defined(_WIN64)
36
- if (_BitScanReverse64(&where, mask))
37
- return static_cast<int>(63 - where);
38
- #elif defined(_WIN32)
39
- // Scan the high 32 bits.
40
- if (_BitScanReverse(&where, static_cast<unsigned long>(mask >> 32)))
41
- return static_cast<int>(63 - (where + 32)); // Create a bit offset from the MSB.
42
- // Scan the low 32 bits.
43
- if (_BitScanReverse(&where, static_cast<unsigned long>(mask)))
44
- return static_cast<int>(63 - where);
45
- #else
46
- #error "Implementation of __builtin_clzll required"
47
- #endif
48
- return 64; // Undefined Behavior.
49
- }
50
-
51
- static inline int __builtin_ctz(unsigned int value) {
52
- unsigned long trailing_zero = 0;
53
-
54
- if (_BitScanForward(&trailing_zero, value)) {
55
- return trailing_zero;
56
- } else {
57
- // This is undefined, I better choose 32 than 0
58
- return 32;
59
- }
60
- }
61
-
62
- static inline int __builtin_clz(unsigned int value) {
63
- unsigned long leading_zero = 0;
64
-
65
- if (_BitScanReverse(&leading_zero, value)) {
66
- return 31 - leading_zero;
67
- } else {
68
- // Same remarks as above
69
- return 32;
70
- }
71
- }
72
-
73
- #endif
74
-
75
13
  namespace duckdb {
76
14
 
77
15
  template <class T>
@@ -89,41 +27,6 @@ struct SignificantBits<uint32_t> {
89
27
  static constexpr uint8_t mask = ((uint8_t)1 << size) - 1;
90
28
  };
91
29
 
92
- template <class T>
93
- struct CountZeros {};
94
-
95
- template <>
96
- struct CountZeros<uint32_t> {
97
- inline static int Leading(uint32_t value) {
98
- if (!value) {
99
- return 32;
100
- }
101
- return __builtin_clz(value);
102
- }
103
- inline static int Trailing(uint32_t value) {
104
- if (!value) {
105
- return 32;
106
- }
107
- return __builtin_ctz(value);
108
- }
109
- };
110
-
111
- template <>
112
- struct CountZeros<uint64_t> {
113
- inline static int Leading(uint64_t value) {
114
- if (!value) {
115
- return 64;
116
- }
117
- return __builtin_clzll(value);
118
- }
119
- inline static int Trailing(uint64_t value) {
120
- if (!value) {
121
- return 64;
122
- }
123
- return __builtin_ctzll(value);
124
- }
125
- };
126
-
127
30
  struct ChimpConstants {
128
31
  struct Compression {
129
32
  static constexpr uint8_t LEADING_ROUND[] = {0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 12, 12, 12, 12,
@@ -14,6 +14,7 @@
14
14
  #include "duckdb/storage/compression/chimp/algorithm/chimp_utils.hpp"
15
15
  #include "duckdb/storage/compression/chimp/algorithm/packed_data.hpp"
16
16
  #include "duckdb/storage/compression/patas/shared.hpp"
17
+ #include "duckdb/common/bit_utils.hpp"
17
18
 
18
19
  namespace duckdb {
19
20
 
@@ -39,6 +39,9 @@ const vector<string> ExtensionHelper::PathComponents() {
39
39
  }
40
40
 
41
41
  string ExtensionHelper::ExtensionDirectory(DBConfig &config, FileSystem &fs, FileOpener *opener) {
42
+ #ifdef WASM_LOADABLE_EXTENSIONS
43
+ static_assertion(0, "ExtensionDirectory functionality is not supported in duckdb-wasm");
44
+ #endif
42
45
  string extension_directory;
43
46
  if (!config.options.extension_directory.empty()) { // create the extension directory if not present
44
47
  extension_directory = config.options.extension_directory;
@@ -111,11 +114,19 @@ bool ExtensionHelper::CreateSuggestions(const string &extension_name, string &me
111
114
  }
112
115
 
113
116
  void ExtensionHelper::InstallExtension(DBConfig &config, FileSystem &fs, const string &extension, bool force_install) {
117
+ #ifdef WASM_LOADABLE_EXTENSIONS
118
+ // Install is currently a no-op
119
+ return;
120
+ #endif
114
121
  string local_path = ExtensionDirectory(config, fs, nullptr);
115
122
  InstallExtensionInternal(config, nullptr, fs, local_path, extension, force_install);
116
123
  }
117
124
 
118
125
  void ExtensionHelper::InstallExtension(ClientContext &context, const string &extension, bool force_install) {
126
+ #ifdef WASM_LOADABLE_EXTENSIONS
127
+ // Install is currently a no-op
128
+ return;
129
+ #endif
119
130
  auto &config = DBConfig::GetConfig(context);
120
131
  auto &fs = FileSystem::GetFileSystem(context);
121
132
  string local_path = ExtensionDirectory(context);