duckdb 0.7.2-dev3441.0 → 0.7.2-dev3546.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/configure.py +2 -0
- package/package.json +2 -2
- package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/read_json.cpp +1 -0
- package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +42 -0
- package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -0
- package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -4
- package/src/duckdb/src/common/compressed_file_system.cpp +2 -2
- package/src/duckdb/src/common/constants.cpp +1 -0
- package/src/duckdb/src/common/file_system.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_serializer.cpp +4 -3
- package/src/duckdb/src/common/serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/radix_sort.cpp +5 -5
- package/src/duckdb/src/common/string_util.cpp +2 -2
- package/src/duckdb/src/common/types/bit.cpp +2 -2
- package/src/duckdb/src/common/types/blob.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +1 -1
- package/src/duckdb/src/common/types/decimal.cpp +2 -2
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +14 -2
- package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
- package/src/duckdb/src/common/types/time.cpp +1 -1
- package/src/duckdb/src/common/types/vector.cpp +10 -10
- package/src/duckdb/src/common/types/vector_buffer.cpp +11 -3
- package/src/duckdb/src/common/types/vector_cache.cpp +5 -5
- package/src/duckdb/src/common/virtual_file_system.cpp +4 -0
- package/src/duckdb/src/common/windows_util.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +6 -3
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -5
- package/src/duckdb/src/core_functions/scalar/string/printf.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
- package/src/duckdb/src/execution/join_hashtable.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -7
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +4 -41
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -2
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +2 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +3 -4
- package/src/duckdb/src/execution/window_segment_tree.cpp +1 -1
- package/src/duckdb/src/function/macro_function.cpp +43 -0
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
- package/src/duckdb/src/function/scalar/strftime_format.cpp +2 -1
- package/src/duckdb/src/function/scalar/string/concat.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
- package/src/duckdb/src/function/scalar_macro_function.cpp +10 -0
- package/src/duckdb/src/function/table/copy_csv.cpp +3 -7
- package/src/duckdb/src/function/table/read_csv.cpp +60 -35
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/function/table_macro_function.cpp +10 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp +0 -6
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp +0 -6
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -5
- package/src/duckdb/src/include/duckdb/common/field_writer.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -9
- package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_serializer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -3
- package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +11 -6
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +16 -6
- package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +53 -22
- package/src/duckdb/src/include/duckdb/common/vector.hpp +5 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +27 -127
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/macro_function.hpp +7 -1
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +3 -4
- package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +7 -2
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +2 -4
- package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +5 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +4 -7
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +8 -12
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +6 -20
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +6 -18
- package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +4 -8
- package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +4 -38
- package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +5 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -10
- package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +9 -2
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +5 -5
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +14 -6
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +42 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +0 -7
- package/src/duckdb/src/parser/parsed_data/create_info.cpp +19 -8
- package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +46 -0
- package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +56 -0
- package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +47 -0
- package/src/duckdb/src/parser/parsed_data/detach_info.cpp +34 -0
- package/src/duckdb/src/parser/parsed_data/drop_info.cpp +46 -0
- package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +24 -0
- package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +37 -0
- package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +10 -0
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +27 -9
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -1
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +32 -7
- package/src/duckdb/src/planner/logical_operator.cpp +1 -2
- package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -25
- package/src/duckdb/src/planner/operator/logical_insert.cpp +30 -0
- package/src/duckdb/src/planner/operator/logical_simple.cpp +33 -5
- package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +6 -16
- package/src/duckdb/src/planner/planner.cpp +4 -13
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -1
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -11
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +12 -6
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -2
- package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/row_group.cpp +2 -2
- package/src/duckdb/src/storage/table/update_segment.cpp +7 -6
- package/src/duckdb/third_party/fsst/libfsst.cpp +1 -2
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +13 -12
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12537 -12415
- package/src/duckdb/ub_src_catalog_catalog_entry.cpp +1 -1
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +16 -0
- package/src/statement.cpp +15 -13
- package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +0 -104
@@ -124,7 +124,8 @@ public:
|
|
124
124
|
//! Turn all the vectors from the chunk into flat vectors
|
125
125
|
DUCKDB_API void Flatten();
|
126
126
|
|
127
|
-
DUCKDB_API
|
127
|
+
// FIXME: this is DUCKDB_API, might need conversion back to regular unique ptr?
|
128
|
+
DUCKDB_API unsafe_array_ptr<UnifiedVectorFormat> ToUnifiedFormat();
|
128
129
|
|
129
130
|
DUCKDB_API void Slice(const SelectionVector &sel_vector, idx_t count);
|
130
131
|
|
@@ -24,7 +24,7 @@ struct TemplatedValidityData {
|
|
24
24
|
public:
|
25
25
|
inline explicit TemplatedValidityData(idx_t count) {
|
26
26
|
auto entry_count = EntryCount(count);
|
27
|
-
owned_data =
|
27
|
+
owned_data = make_unsafe_array<V>(entry_count);
|
28
28
|
for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) {
|
29
29
|
owned_data[entry_idx] = MAX_ENTRY;
|
30
30
|
}
|
@@ -32,13 +32,13 @@ public:
|
|
32
32
|
inline TemplatedValidityData(const V *validity_mask, idx_t count) {
|
33
33
|
D_ASSERT(validity_mask);
|
34
34
|
auto entry_count = EntryCount(count);
|
35
|
-
owned_data =
|
35
|
+
owned_data = make_unsafe_array<V>(entry_count);
|
36
36
|
for (idx_t entry_idx = 0; entry_idx < entry_count; entry_idx++) {
|
37
37
|
owned_data[entry_idx] = validity_mask[entry_idx];
|
38
38
|
}
|
39
39
|
}
|
40
40
|
|
41
|
-
|
41
|
+
unsafe_array_ptr<V> owned_data;
|
42
42
|
|
43
43
|
public:
|
44
44
|
static inline idx_t EntryCount(idx_t count) {
|
@@ -71,10 +71,10 @@ public:
|
|
71
71
|
}
|
72
72
|
explicit VectorBuffer(idx_t data_size) : buffer_type(VectorBufferType::STANDARD_BUFFER) {
|
73
73
|
if (data_size > 0) {
|
74
|
-
data =
|
74
|
+
data = make_unsafe_array<data_t>(data_size);
|
75
75
|
}
|
76
76
|
}
|
77
|
-
explicit VectorBuffer(
|
77
|
+
explicit VectorBuffer(unsafe_array_ptr<data_t> data_p)
|
78
78
|
: buffer_type(VectorBufferType::STANDARD_BUFFER), data(std::move(data_p)) {
|
79
79
|
}
|
80
80
|
virtual ~VectorBuffer() {
|
@@ -87,7 +87,7 @@ public:
|
|
87
87
|
return data.get();
|
88
88
|
}
|
89
89
|
|
90
|
-
void SetData(
|
90
|
+
void SetData(unsafe_array_ptr<data_t> new_data) {
|
91
91
|
data = std::move(new_data);
|
92
92
|
}
|
93
93
|
|
@@ -120,7 +120,7 @@ public:
|
|
120
120
|
protected:
|
121
121
|
VectorBufferType buffer_type;
|
122
122
|
unique_ptr<VectorAuxiliaryData> aux_data;
|
123
|
-
|
123
|
+
unsafe_array_ptr<data_t> data;
|
124
124
|
};
|
125
125
|
|
126
126
|
//! The DictionaryBuffer holds a selection vector
|
@@ -241,12 +241,22 @@ public:
|
|
241
241
|
|
242
242
|
void PushBack(const Value &insert);
|
243
243
|
|
244
|
-
idx_t
|
245
|
-
|
244
|
+
idx_t GetSize() {
|
245
|
+
return size;
|
246
|
+
}
|
247
|
+
|
248
|
+
idx_t GetCapacity() {
|
249
|
+
return capacity;
|
250
|
+
}
|
251
|
+
|
252
|
+
void SetCapacity(idx_t new_capacity);
|
253
|
+
void SetSize(idx_t new_size);
|
246
254
|
|
247
255
|
private:
|
248
256
|
//! child vectors used for nested data
|
249
257
|
unique_ptr<Vector> child;
|
258
|
+
idx_t capacity = 0;
|
259
|
+
idx_t size = 0;
|
250
260
|
};
|
251
261
|
|
252
262
|
//! The ManagedVectorBuffer holds a buffer handle
|
@@ -2,38 +2,45 @@
|
|
2
2
|
|
3
3
|
#include "duckdb/common/exception.hpp"
|
4
4
|
#include "duckdb/common/likely.hpp"
|
5
|
+
#include "duckdb/common/memory_safety.hpp"
|
5
6
|
|
6
7
|
#include <memory>
|
7
8
|
#include <type_traits>
|
8
9
|
|
9
10
|
namespace duckdb {
|
10
11
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
12
|
+
template <class _Tp, bool SAFE = true>
|
13
|
+
class unique_ptr : public std::unique_ptr<_Tp, std::default_delete<_Tp>> {
|
14
|
+
public:
|
15
|
+
using original = std::unique_ptr<_Tp, std::default_delete<_Tp>>;
|
16
|
+
using original::original;
|
17
|
+
|
18
|
+
private:
|
19
|
+
static inline void AssertNotNull(const bool null) {
|
20
|
+
#if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
|
21
|
+
return;
|
22
|
+
#else
|
23
|
+
if (DUCKDB_UNLIKELY(null)) {
|
24
|
+
throw duckdb::InternalException("Attempted to dereference unique_ptr that is NULL!");
|
17
25
|
}
|
18
26
|
#endif
|
19
27
|
}
|
20
|
-
};
|
21
|
-
} // namespace
|
22
28
|
|
23
|
-
template <class _Tp, class _Dp = std::default_delete<_Tp>>
|
24
|
-
class unique_ptr : public std::unique_ptr<_Tp, _Dp> {
|
25
29
|
public:
|
26
|
-
using original = std::unique_ptr<_Tp, _Dp>;
|
27
|
-
using original::original;
|
28
|
-
|
29
30
|
typename std::add_lvalue_reference<_Tp>::type operator*() const {
|
30
|
-
|
31
|
-
|
31
|
+
const auto ptr = original::get();
|
32
|
+
if (MemorySafety<SAFE>::enabled) {
|
33
|
+
AssertNotNull(!ptr);
|
34
|
+
}
|
35
|
+
return *ptr;
|
32
36
|
}
|
33
37
|
|
34
38
|
typename original::pointer operator->() const {
|
35
|
-
|
36
|
-
|
39
|
+
const auto ptr = original::get();
|
40
|
+
if (MemorySafety<SAFE>::enabled) {
|
41
|
+
AssertNotNull(!ptr);
|
42
|
+
}
|
43
|
+
return ptr;
|
37
44
|
}
|
38
45
|
|
39
46
|
#ifdef DUCKDB_CLANG_TIDY
|
@@ -46,16 +53,40 @@ public:
|
|
46
53
|
}
|
47
54
|
};
|
48
55
|
|
49
|
-
template <class _Tp,
|
50
|
-
class unique_ptr<_Tp[],
|
56
|
+
template <class _Tp, bool SAFE>
|
57
|
+
class unique_ptr<_Tp[], SAFE> : public std::unique_ptr<_Tp[], std::default_delete<_Tp[]>> {
|
51
58
|
public:
|
52
|
-
using original = std::unique_ptr<_Tp[],
|
59
|
+
using original = std::unique_ptr<_Tp[], std::default_delete<_Tp[]>>;
|
53
60
|
using original::original;
|
54
61
|
|
62
|
+
private:
|
63
|
+
static inline void AssertNotNull(const bool null) {
|
64
|
+
#if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
|
65
|
+
return;
|
66
|
+
#else
|
67
|
+
if (DUCKDB_UNLIKELY(null)) {
|
68
|
+
throw duckdb::InternalException("Attempted to dereference unique_ptr that is NULL!");
|
69
|
+
}
|
70
|
+
#endif
|
71
|
+
}
|
72
|
+
|
73
|
+
public:
|
55
74
|
typename std::add_lvalue_reference<_Tp>::type operator[](size_t __i) const {
|
56
|
-
|
57
|
-
|
75
|
+
const auto ptr = original::get();
|
76
|
+
if (MemorySafety<SAFE>::enabled) {
|
77
|
+
AssertNotNull(!ptr);
|
78
|
+
}
|
79
|
+
return ptr[__i];
|
58
80
|
}
|
59
81
|
};
|
60
82
|
|
83
|
+
template <typename T>
|
84
|
+
using array_ptr = unique_ptr<T[], true>;
|
85
|
+
|
86
|
+
template <typename T>
|
87
|
+
using unsafe_array_ptr = unique_ptr<T[], false>;
|
88
|
+
|
89
|
+
template <typename T>
|
90
|
+
using unsafe_unique_ptr = unique_ptr<T, false>;
|
91
|
+
|
61
92
|
} // namespace duckdb
|
@@ -12,6 +12,7 @@
|
|
12
12
|
#include "duckdb/common/typedefs.hpp"
|
13
13
|
#include "duckdb/common/likely.hpp"
|
14
14
|
#include "duckdb/common/exception.hpp"
|
15
|
+
#include "duckdb/common/memory_safety.hpp"
|
15
16
|
#include <vector>
|
16
17
|
|
17
18
|
namespace duckdb {
|
@@ -25,6 +26,7 @@ public:
|
|
25
26
|
using const_reference = typename original::const_reference;
|
26
27
|
using reference = typename original::reference;
|
27
28
|
|
29
|
+
private:
|
28
30
|
static inline void AssertIndexInBounds(idx_t index, idx_t size) {
|
29
31
|
#if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
|
30
32
|
return;
|
@@ -35,6 +37,7 @@ public:
|
|
35
37
|
#endif
|
36
38
|
}
|
37
39
|
|
40
|
+
public:
|
38
41
|
#ifdef DUCKDB_CLANG_TIDY
|
39
42
|
// This is necessary to tell clang-tidy that it reinitializes the variable after a move
|
40
43
|
[[clang::reinitializes]]
|
@@ -55,7 +58,7 @@ public:
|
|
55
58
|
|
56
59
|
template <bool _SAFE = false>
|
57
60
|
inline typename original::reference get(typename original::size_type __n) {
|
58
|
-
if (_SAFE) {
|
61
|
+
if (MemorySafety<_SAFE>::enabled) {
|
59
62
|
AssertIndexInBounds(__n, original::size());
|
60
63
|
}
|
61
64
|
return original::operator[](__n);
|
@@ -63,7 +66,7 @@ public:
|
|
63
66
|
|
64
67
|
template <bool _SAFE = false>
|
65
68
|
inline typename original::const_reference get(typename original::size_type __n) const {
|
66
|
-
if (_SAFE) {
|
69
|
+
if (MemorySafety<_SAFE>::enabled) {
|
67
70
|
AssertIndexInBounds(__n, original::size());
|
68
71
|
}
|
69
72
|
return original::operator[](__n);
|
@@ -73,7 +73,7 @@ struct AggregateHTAppendState {
|
|
73
73
|
SelectionVector empty_vector;
|
74
74
|
SelectionVector new_groups;
|
75
75
|
Vector addresses;
|
76
|
-
|
76
|
+
unsafe_array_ptr<UnifiedVectorFormat> group_data;
|
77
77
|
DataChunk group_chunk;
|
78
78
|
|
79
79
|
TupleDataChunkState chunk_state;
|
@@ -65,12 +65,12 @@ public:
|
|
65
65
|
//! returned by the JoinHashTable::Scan function and can be used to resume a
|
66
66
|
//! probe.
|
67
67
|
struct ScanStructure {
|
68
|
-
|
68
|
+
unsafe_array_ptr<UnifiedVectorFormat> key_data;
|
69
69
|
Vector pointers;
|
70
70
|
idx_t count;
|
71
71
|
SelectionVector sel_vector;
|
72
72
|
// whether or not the given tuple has found a match
|
73
|
-
|
73
|
+
unsafe_array_ptr<bool> found_match;
|
74
74
|
JoinHashTable &ht;
|
75
75
|
bool finished;
|
76
76
|
|
@@ -212,8 +212,8 @@ private:
|
|
212
212
|
//! Insert the given set of locations into the HT with the given set of hashes
|
213
213
|
void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[], bool parallel);
|
214
214
|
|
215
|
-
idx_t PrepareKeys(DataChunk &keys,
|
216
|
-
SelectionVector &sel, bool build_side);
|
215
|
+
idx_t PrepareKeys(DataChunk &keys, unsafe_array_ptr<UnifiedVectorFormat> &key_data,
|
216
|
+
const SelectionVector *¤t_sel, SelectionVector &sel, bool build_side);
|
217
217
|
|
218
218
|
//! Lock for combining data_collection when merging HTs
|
219
219
|
mutex data_lock;
|
package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp
CHANGED
@@ -68,7 +68,7 @@ private:
|
|
68
68
|
//! Build and probe statistics
|
69
69
|
PerfectHashJoinStats perfect_join_statistics;
|
70
70
|
//! Stores the occurences of each value in the build side
|
71
|
-
|
71
|
+
unsafe_array_ptr<bool> bitmap_build_idx;
|
72
72
|
//! Stores the number of unique keys in the build side
|
73
73
|
idx_t unique_keys = 0;
|
74
74
|
};
|
@@ -83,7 +83,7 @@ public:
|
|
83
83
|
//! The total number of rows in the RHS
|
84
84
|
atomic<idx_t> count;
|
85
85
|
//! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
|
86
|
-
|
86
|
+
unsafe_array_ptr<bool> found_match;
|
87
87
|
//! Memory usage per thread
|
88
88
|
idx_t memory_per_thread;
|
89
89
|
};
|
@@ -60,12 +60,12 @@ public:
|
|
60
60
|
virtual ~BufferedCSVReader() {
|
61
61
|
}
|
62
62
|
|
63
|
-
|
63
|
+
unsafe_array_ptr<char> buffer;
|
64
64
|
idx_t buffer_size;
|
65
65
|
idx_t position;
|
66
66
|
idx_t start = 0;
|
67
67
|
|
68
|
-
vector<
|
68
|
+
vector<unsafe_array_ptr<char>> cached_buffers;
|
69
69
|
|
70
70
|
unique_ptr<CSVFileHandle> file_handle;
|
71
71
|
|
@@ -11,152 +11,52 @@
|
|
11
11
|
#include "duckdb/common/file_system.hpp"
|
12
12
|
#include "duckdb/common/mutex.hpp"
|
13
13
|
#include "duckdb/common/helper.hpp"
|
14
|
+
#include "duckdb/common/allocator.hpp"
|
14
15
|
|
15
16
|
namespace duckdb {
|
17
|
+
class Allocator;
|
18
|
+
class FileSystem;
|
16
19
|
|
17
20
|
struct CSVFileHandle {
|
18
21
|
public:
|
19
|
-
|
20
|
-
|
21
|
-
can_seek = file_handle->CanSeek();
|
22
|
-
plain_file_source = file_handle->OnDiskFile() && can_seek;
|
23
|
-
file_size = file_handle->GetFileSize();
|
24
|
-
}
|
22
|
+
CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<FileHandle> file_handle_p, const string &path_p,
|
23
|
+
FileCompressionType compression, bool enable_reset = true);
|
25
24
|
|
26
|
-
|
27
|
-
return can_seek;
|
28
|
-
}
|
29
|
-
void Seek(idx_t position) {
|
30
|
-
if (!can_seek) {
|
31
|
-
throw InternalException("Cannot seek in this file");
|
32
|
-
}
|
33
|
-
file_handle->Seek(position);
|
34
|
-
}
|
35
|
-
idx_t SeekPosition() {
|
36
|
-
if (!can_seek) {
|
37
|
-
throw InternalException("Cannot seek in this file");
|
38
|
-
}
|
39
|
-
return file_handle->SeekPosition();
|
40
|
-
}
|
41
|
-
void Reset() {
|
42
|
-
if (plain_file_source) {
|
43
|
-
file_handle->Reset();
|
44
|
-
} else {
|
45
|
-
if (!reset_enabled) {
|
46
|
-
throw InternalException("Reset called but reset is not enabled for this CSV Handle");
|
47
|
-
}
|
48
|
-
read_position = 0;
|
49
|
-
}
|
50
|
-
}
|
51
|
-
bool PlainFileSource() {
|
52
|
-
return plain_file_source;
|
53
|
-
}
|
54
|
-
|
55
|
-
bool OnDiskFile() {
|
56
|
-
return file_handle->OnDiskFile();
|
57
|
-
}
|
58
|
-
|
59
|
-
idx_t FileSize() {
|
60
|
-
return file_size;
|
61
|
-
}
|
25
|
+
mutex main_mutex;
|
62
26
|
|
63
|
-
|
64
|
-
|
65
|
-
|
27
|
+
public:
|
28
|
+
bool CanSeek();
|
29
|
+
void Seek(idx_t position);
|
30
|
+
idx_t SeekPosition();
|
31
|
+
void Reset();
|
32
|
+
bool OnDiskFile();
|
66
33
|
|
67
|
-
idx_t
|
68
|
-
requested_bytes += nr_bytes;
|
69
|
-
if (!plain_file_source) {
|
70
|
-
// not a plain file source: we need to do some bookkeeping around the reset functionality
|
71
|
-
idx_t result_offset = 0;
|
72
|
-
if (read_position < buffer_size) {
|
73
|
-
// we need to read from our cached buffer
|
74
|
-
auto buffer_read_count = MinValue<idx_t>(nr_bytes, buffer_size - read_position);
|
75
|
-
memcpy(buffer, cached_buffer.get() + read_position, buffer_read_count);
|
76
|
-
result_offset += buffer_read_count;
|
77
|
-
read_position += buffer_read_count;
|
78
|
-
if (result_offset == nr_bytes) {
|
79
|
-
return nr_bytes;
|
80
|
-
}
|
81
|
-
} else if (!reset_enabled && cached_buffer) {
|
82
|
-
// reset is disabled, but we still have cached data
|
83
|
-
// we can remove any cached data
|
84
|
-
cached_buffer.reset();
|
85
|
-
buffer_size = 0;
|
86
|
-
buffer_capacity = 0;
|
87
|
-
read_position = 0;
|
88
|
-
}
|
89
|
-
// we have data left to read from the file
|
90
|
-
// read directly into the buffer
|
91
|
-
auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
|
92
|
-
file_size = file_handle->GetFileSize();
|
93
|
-
read_position += bytes_read;
|
94
|
-
if (reset_enabled) {
|
95
|
-
// if reset caching is enabled, we need to cache the bytes that we have read
|
96
|
-
if (buffer_size + bytes_read >= buffer_capacity) {
|
97
|
-
// no space; first enlarge the buffer
|
98
|
-
buffer_capacity = MaxValue<idx_t>(NextPowerOfTwo(buffer_size + bytes_read), buffer_capacity * 2);
|
34
|
+
idx_t FileSize();
|
99
35
|
|
100
|
-
|
101
|
-
if (buffer_size > 0) {
|
102
|
-
memcpy(new_buffer.get(), cached_buffer.get(), buffer_size);
|
103
|
-
}
|
104
|
-
cached_buffer = std::move(new_buffer);
|
105
|
-
}
|
106
|
-
memcpy(cached_buffer.get() + buffer_size, (char *)buffer + result_offset, bytes_read);
|
107
|
-
buffer_size += bytes_read;
|
108
|
-
}
|
36
|
+
bool FinishedReading();
|
109
37
|
|
110
|
-
|
111
|
-
} else {
|
112
|
-
return file_handle->Read(buffer, nr_bytes);
|
113
|
-
}
|
114
|
-
}
|
38
|
+
idx_t Read(void *buffer, idx_t nr_bytes);
|
115
39
|
|
116
|
-
string ReadLine()
|
117
|
-
|
118
|
-
string result;
|
119
|
-
char buffer[1];
|
120
|
-
while (true) {
|
121
|
-
idx_t bytes_read = Read(buffer, 1);
|
122
|
-
if (bytes_read == 0) {
|
123
|
-
return result;
|
124
|
-
}
|
125
|
-
if (carriage_return) {
|
126
|
-
if (buffer[0] != '\n') {
|
127
|
-
if (!file_handle->CanSeek()) {
|
128
|
-
throw BinderException(
|
129
|
-
"Carriage return newlines not supported when reading CSV files in which we cannot seek");
|
130
|
-
}
|
131
|
-
file_handle->Seek(file_handle->SeekPosition() - 1);
|
132
|
-
return result;
|
133
|
-
}
|
134
|
-
}
|
135
|
-
if (buffer[0] == '\n') {
|
136
|
-
return result;
|
137
|
-
}
|
138
|
-
if (buffer[0] != '\r') {
|
139
|
-
result += buffer[0];
|
140
|
-
} else {
|
141
|
-
carriage_return = true;
|
142
|
-
}
|
143
|
-
}
|
144
|
-
}
|
40
|
+
string ReadLine();
|
41
|
+
void DisableReset();
|
145
42
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
idx_t count = 0;
|
43
|
+
static unique_ptr<FileHandle> OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
|
44
|
+
FileCompressionType compression);
|
45
|
+
static unique_ptr<CSVFileHandle> OpenFile(FileSystem &fs, Allocator &allocator, const string &path,
|
46
|
+
FileCompressionType compression, bool enable_reset);
|
151
47
|
|
152
48
|
private:
|
49
|
+
FileSystem &fs;
|
50
|
+
Allocator &allocator;
|
153
51
|
unique_ptr<FileHandle> file_handle;
|
52
|
+
string path;
|
53
|
+
FileCompressionType compression;
|
154
54
|
bool reset_enabled = true;
|
155
55
|
bool can_seek = false;
|
156
|
-
bool
|
56
|
+
bool on_disk_file = false;
|
157
57
|
idx_t file_size = 0;
|
158
58
|
// reset support
|
159
|
-
|
59
|
+
AllocatedData cached_buffer;
|
160
60
|
idx_t read_position = 0;
|
161
61
|
idx_t buffer_size = 0;
|
162
62
|
idx_t buffer_capacity = 0;
|
@@ -130,8 +130,7 @@ struct BufferedCSVReaderOptions {
|
|
130
130
|
std::map<LogicalTypeId, StrfTimeFormat> write_date_format = {{LogicalTypeId::DATE, {}},
|
131
131
|
{LogicalTypeId::TIMESTAMP, {}}};
|
132
132
|
//! Whether or not a type format is specified
|
133
|
-
std::map<LogicalTypeId, bool> has_format = {
|
134
|
-
{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}, {LogicalTypeId::TIMESTAMP_TZ, false}};
|
133
|
+
std::map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
|
135
134
|
|
136
135
|
void Serialize(FieldWriter &writer) const;
|
137
136
|
void Deserialize(FieldReader &reader);
|
@@ -67,7 +67,7 @@ struct CSVBufferRead {
|
|
67
67
|
} else {
|
68
68
|
// 3) It starts in the current buffer and ends in the next buffer
|
69
69
|
D_ASSERT(next_buffer);
|
70
|
-
auto intersection =
|
70
|
+
auto intersection = make_unsafe_array<char>(length);
|
71
71
|
idx_t cur_pos = 0;
|
72
72
|
auto buffer_ptr = buffer->Ptr();
|
73
73
|
for (idx_t i = start_buffer; i < buffer->GetBufferSize(); i++) {
|
@@ -85,7 +85,7 @@ struct CSVBufferRead {
|
|
85
85
|
|
86
86
|
shared_ptr<CSVBuffer> buffer;
|
87
87
|
shared_ptr<CSVBuffer> next_buffer;
|
88
|
-
vector<
|
88
|
+
vector<unsafe_array_ptr<char>> intersections;
|
89
89
|
optional_ptr<LineInfo> line_info;
|
90
90
|
|
91
91
|
idx_t buffer_start;
|
@@ -38,7 +38,7 @@ public:
|
|
38
38
|
SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override;
|
39
39
|
|
40
40
|
bool IsSink() const override {
|
41
|
-
return
|
41
|
+
return !children.empty();
|
42
42
|
}
|
43
43
|
|
44
44
|
bool ParallelSink() const override {
|
@@ -46,9 +46,9 @@ protected:
|
|
46
46
|
// The actual pointer to the data
|
47
47
|
data_ptr_t data;
|
48
48
|
//! The owned data of the HT
|
49
|
-
|
49
|
+
unsafe_array_ptr<data_t> owned_data;
|
50
50
|
//! Information on whether or not a specific group has any entries
|
51
|
-
|
51
|
+
unsafe_array_ptr<bool> group_is_set;
|
52
52
|
|
53
53
|
//! The minimum values for each of the group columns
|
54
54
|
vector<Value> group_minima;
|
@@ -113,7 +113,7 @@ private:
|
|
113
113
|
Vector statev;
|
114
114
|
|
115
115
|
//! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes
|
116
|
-
|
116
|
+
unsafe_array_ptr<data_t> levels_flat_native;
|
117
117
|
//! For each level, the starting location in the levels_flat_native array
|
118
118
|
vector<idx_t> levels_flat_start;
|
119
119
|
|
@@ -21,7 +21,7 @@ enum class MacroType : uint8_t { VOID_MACRO = 0, TABLE_MACRO = 1, SCALAR_MACRO =
|
|
21
21
|
|
22
22
|
class MacroFunction {
|
23
23
|
public:
|
24
|
-
MacroFunction(MacroType type);
|
24
|
+
explicit MacroFunction(MacroType type);
|
25
25
|
|
26
26
|
//! The type
|
27
27
|
MacroType type;
|
@@ -45,6 +45,12 @@ public:
|
|
45
45
|
|
46
46
|
virtual string ToSQL(const string &schema, const string &name) const;
|
47
47
|
|
48
|
+
void Serialize(Serializer &serializer) const;
|
49
|
+
static unique_ptr<MacroFunction> Deserialize(Deserializer &deserializer);
|
50
|
+
|
51
|
+
protected:
|
52
|
+
virtual void SerializeInternal(FieldWriter &writer) const = 0;
|
53
|
+
|
48
54
|
public:
|
49
55
|
template <class TARGET>
|
50
56
|
TARGET &Cast() {
|
@@ -66,6 +66,9 @@ public:
|
|
66
66
|
return std::find(specifiers.begin(), specifiers.end(), s) != specifiers.end();
|
67
67
|
}
|
68
68
|
|
69
|
+
//! The full format specifier, for error messages
|
70
|
+
string format_specifier;
|
71
|
+
|
69
72
|
protected:
|
70
73
|
//! The format specifiers
|
71
74
|
vector<StrTimeSpecifier> specifiers;
|
@@ -134,10 +137,6 @@ public:
|
|
134
137
|
DUCKDB_API string FormatError(string_t input, const string &format_specifier);
|
135
138
|
};
|
136
139
|
|
137
|
-
public:
|
138
|
-
//! The full format specifier, for error messages
|
139
|
-
string format_specifier;
|
140
|
-
|
141
140
|
public:
|
142
141
|
DUCKDB_API static ParseResult Parse(const string &format, const string &text);
|
143
142
|
|
@@ -23,9 +23,9 @@ public:
|
|
23
23
|
static constexpr const MacroType TYPE = MacroType::SCALAR_MACRO;
|
24
24
|
|
25
25
|
public:
|
26
|
-
ScalarMacroFunction(unique_ptr<ParsedExpression> expression);
|
27
|
-
|
26
|
+
explicit ScalarMacroFunction(unique_ptr<ParsedExpression> expression);
|
28
27
|
ScalarMacroFunction(void);
|
28
|
+
|
29
29
|
//! The macro expression
|
30
30
|
unique_ptr<ParsedExpression> expression;
|
31
31
|
|
@@ -33,6 +33,11 @@ public:
|
|
33
33
|
unique_ptr<MacroFunction> Copy() const override;
|
34
34
|
|
35
35
|
string ToSQL(const string &schema, const string &name) const override;
|
36
|
+
|
37
|
+
static unique_ptr<MacroFunction> Deserialize(FieldReader &reader);
|
38
|
+
|
39
|
+
protected:
|
40
|
+
void SerializeInternal(FieldWriter &writer) const override;
|
36
41
|
};
|
37
42
|
|
38
43
|
} // namespace duckdb
|