duckdb 0.7.2-dev3402.0 → 0.7.2-dev3515.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -4
- package/src/duckdb/src/common/compressed_file_system.cpp +2 -2
- package/src/duckdb/src/common/file_system.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_serializer.cpp +4 -3
- package/src/duckdb/src/common/serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/radix_sort.cpp +5 -5
- package/src/duckdb/src/common/string_util.cpp +2 -2
- package/src/duckdb/src/common/types/bit.cpp +2 -2
- package/src/duckdb/src/common/types/blob.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +1 -1
- package/src/duckdb/src/common/types/decimal.cpp +2 -2
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +14 -2
- package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
- package/src/duckdb/src/common/types/time.cpp +1 -1
- package/src/duckdb/src/common/types/vector.cpp +7 -7
- package/src/duckdb/src/common/virtual_file_system.cpp +4 -0
- package/src/duckdb/src/common/windows_util.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +6 -3
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -5
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +10 -7
- package/src/duckdb/src/core_functions/scalar/string/printf.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
- package/src/duckdb/src/execution/expression_executor/execute_between.cpp +3 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -0
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -7
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +4 -41
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +1 -1
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +2 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +3 -4
- package/src/duckdb/src/execution/window_segment_tree.cpp +1 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
- package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/concat.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
- package/src/duckdb/src/function/table/read_csv.cpp +43 -35
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -5
- package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -9
- package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_serializer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -3
- package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +11 -6
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +53 -22
- package/src/duckdb/src/include/duckdb/common/vector.hpp +5 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +27 -127
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +2 -4
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +5 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +9 -2
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +2 -0
- package/src/duckdb/src/main/client_context.cpp +1 -0
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -1
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +25 -1
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +5 -5
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +14 -6
- package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +10 -0
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +64 -42
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -1
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -11
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -2
- package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
- package/src/duckdb/src/storage/table/row_group.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -25
- package/src/duckdb/src/storage/table/update_segment.cpp +7 -6
- package/src/duckdb/third_party/fsst/libfsst.cpp +1 -2
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +13 -12
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +13050 -12885
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/statement.cpp +15 -13
@@ -12,6 +12,7 @@
|
|
12
12
|
#include "duckdb/common/typedefs.hpp"
|
13
13
|
#include "duckdb/common/likely.hpp"
|
14
14
|
#include "duckdb/common/exception.hpp"
|
15
|
+
#include "duckdb/common/memory_safety.hpp"
|
15
16
|
#include <vector>
|
16
17
|
|
17
18
|
namespace duckdb {
|
@@ -25,6 +26,7 @@ public:
|
|
25
26
|
using const_reference = typename original::const_reference;
|
26
27
|
using reference = typename original::reference;
|
27
28
|
|
29
|
+
private:
|
28
30
|
static inline void AssertIndexInBounds(idx_t index, idx_t size) {
|
29
31
|
#if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
|
30
32
|
return;
|
@@ -35,6 +37,7 @@ public:
|
|
35
37
|
#endif
|
36
38
|
}
|
37
39
|
|
40
|
+
public:
|
38
41
|
#ifdef DUCKDB_CLANG_TIDY
|
39
42
|
// This is necessary to tell clang-tidy that it reinitializes the variable after a move
|
40
43
|
[[clang::reinitializes]]
|
@@ -55,7 +58,7 @@ public:
|
|
55
58
|
|
56
59
|
template <bool _SAFE = false>
|
57
60
|
inline typename original::reference get(typename original::size_type __n) {
|
58
|
-
if (_SAFE) {
|
61
|
+
if (MemorySafety<_SAFE>::enabled) {
|
59
62
|
AssertIndexInBounds(__n, original::size());
|
60
63
|
}
|
61
64
|
return original::operator[](__n);
|
@@ -63,7 +66,7 @@ public:
|
|
63
66
|
|
64
67
|
template <bool _SAFE = false>
|
65
68
|
inline typename original::const_reference get(typename original::size_type __n) const {
|
66
|
-
if (_SAFE) {
|
69
|
+
if (MemorySafety<_SAFE>::enabled) {
|
67
70
|
AssertIndexInBounds(__n, original::size());
|
68
71
|
}
|
69
72
|
return original::operator[](__n);
|
@@ -73,7 +73,7 @@ struct AggregateHTAppendState {
|
|
73
73
|
SelectionVector empty_vector;
|
74
74
|
SelectionVector new_groups;
|
75
75
|
Vector addresses;
|
76
|
-
|
76
|
+
unsafe_array_ptr<UnifiedVectorFormat> group_data;
|
77
77
|
DataChunk group_chunk;
|
78
78
|
|
79
79
|
TupleDataChunkState chunk_state;
|
@@ -65,12 +65,12 @@ public:
|
|
65
65
|
//! returned by the JoinHashTable::Scan function and can be used to resume a
|
66
66
|
//! probe.
|
67
67
|
struct ScanStructure {
|
68
|
-
|
68
|
+
unsafe_array_ptr<UnifiedVectorFormat> key_data;
|
69
69
|
Vector pointers;
|
70
70
|
idx_t count;
|
71
71
|
SelectionVector sel_vector;
|
72
72
|
// whether or not the given tuple has found a match
|
73
|
-
|
73
|
+
unsafe_array_ptr<bool> found_match;
|
74
74
|
JoinHashTable &ht;
|
75
75
|
bool finished;
|
76
76
|
|
@@ -212,8 +212,8 @@ private:
|
|
212
212
|
//! Insert the given set of locations into the HT with the given set of hashes
|
213
213
|
void InsertHashes(Vector &hashes, idx_t count, data_ptr_t key_locations[], bool parallel);
|
214
214
|
|
215
|
-
idx_t PrepareKeys(DataChunk &keys,
|
216
|
-
SelectionVector &sel, bool build_side);
|
215
|
+
idx_t PrepareKeys(DataChunk &keys, unsafe_array_ptr<UnifiedVectorFormat> &key_data,
|
216
|
+
const SelectionVector *¤t_sel, SelectionVector &sel, bool build_side);
|
217
217
|
|
218
218
|
//! Lock for combining data_collection when merging HTs
|
219
219
|
mutex data_lock;
|
package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp
CHANGED
@@ -68,7 +68,7 @@ private:
|
|
68
68
|
//! Build and probe statistics
|
69
69
|
PerfectHashJoinStats perfect_join_statistics;
|
70
70
|
//! Stores the occurences of each value in the build side
|
71
|
-
|
71
|
+
unsafe_array_ptr<bool> bitmap_build_idx;
|
72
72
|
//! Stores the number of unique keys in the build side
|
73
73
|
idx_t unique_keys = 0;
|
74
74
|
};
|
@@ -83,7 +83,7 @@ public:
|
|
83
83
|
//! The total number of rows in the RHS
|
84
84
|
atomic<idx_t> count;
|
85
85
|
//! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
|
86
|
-
|
86
|
+
unsafe_array_ptr<bool> found_match;
|
87
87
|
//! Memory usage per thread
|
88
88
|
idx_t memory_per_thread;
|
89
89
|
};
|
@@ -60,12 +60,12 @@ public:
|
|
60
60
|
virtual ~BufferedCSVReader() {
|
61
61
|
}
|
62
62
|
|
63
|
-
|
63
|
+
unsafe_array_ptr<char> buffer;
|
64
64
|
idx_t buffer_size;
|
65
65
|
idx_t position;
|
66
66
|
idx_t start = 0;
|
67
67
|
|
68
|
-
vector<
|
68
|
+
vector<unsafe_array_ptr<char>> cached_buffers;
|
69
69
|
|
70
70
|
unique_ptr<CSVFileHandle> file_handle;
|
71
71
|
|
@@ -11,152 +11,52 @@
|
|
11
11
|
#include "duckdb/common/file_system.hpp"
|
12
12
|
#include "duckdb/common/mutex.hpp"
|
13
13
|
#include "duckdb/common/helper.hpp"
|
14
|
+
#include "duckdb/common/allocator.hpp"
|
14
15
|
|
15
16
|
namespace duckdb {
|
17
|
+
class Allocator;
|
18
|
+
class FileSystem;
|
16
19
|
|
17
20
|
struct CSVFileHandle {
|
18
21
|
public:
|
19
|
-
|
20
|
-
|
21
|
-
can_seek = file_handle->CanSeek();
|
22
|
-
plain_file_source = file_handle->OnDiskFile() && can_seek;
|
23
|
-
file_size = file_handle->GetFileSize();
|
24
|
-
}
|
22
|
+
CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<FileHandle> file_handle_p, const string &path_p,
|
23
|
+
FileCompressionType compression, bool enable_reset = true);
|
25
24
|
|
26
|
-
|
27
|
-
return can_seek;
|
28
|
-
}
|
29
|
-
void Seek(idx_t position) {
|
30
|
-
if (!can_seek) {
|
31
|
-
throw InternalException("Cannot seek in this file");
|
32
|
-
}
|
33
|
-
file_handle->Seek(position);
|
34
|
-
}
|
35
|
-
idx_t SeekPosition() {
|
36
|
-
if (!can_seek) {
|
37
|
-
throw InternalException("Cannot seek in this file");
|
38
|
-
}
|
39
|
-
return file_handle->SeekPosition();
|
40
|
-
}
|
41
|
-
void Reset() {
|
42
|
-
if (plain_file_source) {
|
43
|
-
file_handle->Reset();
|
44
|
-
} else {
|
45
|
-
if (!reset_enabled) {
|
46
|
-
throw InternalException("Reset called but reset is not enabled for this CSV Handle");
|
47
|
-
}
|
48
|
-
read_position = 0;
|
49
|
-
}
|
50
|
-
}
|
51
|
-
bool PlainFileSource() {
|
52
|
-
return plain_file_source;
|
53
|
-
}
|
54
|
-
|
55
|
-
bool OnDiskFile() {
|
56
|
-
return file_handle->OnDiskFile();
|
57
|
-
}
|
58
|
-
|
59
|
-
idx_t FileSize() {
|
60
|
-
return file_size;
|
61
|
-
}
|
25
|
+
mutex main_mutex;
|
62
26
|
|
63
|
-
|
64
|
-
|
65
|
-
|
27
|
+
public:
|
28
|
+
bool CanSeek();
|
29
|
+
void Seek(idx_t position);
|
30
|
+
idx_t SeekPosition();
|
31
|
+
void Reset();
|
32
|
+
bool OnDiskFile();
|
66
33
|
|
67
|
-
idx_t
|
68
|
-
requested_bytes += nr_bytes;
|
69
|
-
if (!plain_file_source) {
|
70
|
-
// not a plain file source: we need to do some bookkeeping around the reset functionality
|
71
|
-
idx_t result_offset = 0;
|
72
|
-
if (read_position < buffer_size) {
|
73
|
-
// we need to read from our cached buffer
|
74
|
-
auto buffer_read_count = MinValue<idx_t>(nr_bytes, buffer_size - read_position);
|
75
|
-
memcpy(buffer, cached_buffer.get() + read_position, buffer_read_count);
|
76
|
-
result_offset += buffer_read_count;
|
77
|
-
read_position += buffer_read_count;
|
78
|
-
if (result_offset == nr_bytes) {
|
79
|
-
return nr_bytes;
|
80
|
-
}
|
81
|
-
} else if (!reset_enabled && cached_buffer) {
|
82
|
-
// reset is disabled, but we still have cached data
|
83
|
-
// we can remove any cached data
|
84
|
-
cached_buffer.reset();
|
85
|
-
buffer_size = 0;
|
86
|
-
buffer_capacity = 0;
|
87
|
-
read_position = 0;
|
88
|
-
}
|
89
|
-
// we have data left to read from the file
|
90
|
-
// read directly into the buffer
|
91
|
-
auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
|
92
|
-
file_size = file_handle->GetFileSize();
|
93
|
-
read_position += bytes_read;
|
94
|
-
if (reset_enabled) {
|
95
|
-
// if reset caching is enabled, we need to cache the bytes that we have read
|
96
|
-
if (buffer_size + bytes_read >= buffer_capacity) {
|
97
|
-
// no space; first enlarge the buffer
|
98
|
-
buffer_capacity = MaxValue<idx_t>(NextPowerOfTwo(buffer_size + bytes_read), buffer_capacity * 2);
|
34
|
+
idx_t FileSize();
|
99
35
|
|
100
|
-
|
101
|
-
if (buffer_size > 0) {
|
102
|
-
memcpy(new_buffer.get(), cached_buffer.get(), buffer_size);
|
103
|
-
}
|
104
|
-
cached_buffer = std::move(new_buffer);
|
105
|
-
}
|
106
|
-
memcpy(cached_buffer.get() + buffer_size, (char *)buffer + result_offset, bytes_read);
|
107
|
-
buffer_size += bytes_read;
|
108
|
-
}
|
36
|
+
bool FinishedReading();
|
109
37
|
|
110
|
-
|
111
|
-
} else {
|
112
|
-
return file_handle->Read(buffer, nr_bytes);
|
113
|
-
}
|
114
|
-
}
|
38
|
+
idx_t Read(void *buffer, idx_t nr_bytes);
|
115
39
|
|
116
|
-
string ReadLine()
|
117
|
-
|
118
|
-
string result;
|
119
|
-
char buffer[1];
|
120
|
-
while (true) {
|
121
|
-
idx_t bytes_read = Read(buffer, 1);
|
122
|
-
if (bytes_read == 0) {
|
123
|
-
return result;
|
124
|
-
}
|
125
|
-
if (carriage_return) {
|
126
|
-
if (buffer[0] != '\n') {
|
127
|
-
if (!file_handle->CanSeek()) {
|
128
|
-
throw BinderException(
|
129
|
-
"Carriage return newlines not supported when reading CSV files in which we cannot seek");
|
130
|
-
}
|
131
|
-
file_handle->Seek(file_handle->SeekPosition() - 1);
|
132
|
-
return result;
|
133
|
-
}
|
134
|
-
}
|
135
|
-
if (buffer[0] == '\n') {
|
136
|
-
return result;
|
137
|
-
}
|
138
|
-
if (buffer[0] != '\r') {
|
139
|
-
result += buffer[0];
|
140
|
-
} else {
|
141
|
-
carriage_return = true;
|
142
|
-
}
|
143
|
-
}
|
144
|
-
}
|
40
|
+
string ReadLine();
|
41
|
+
void DisableReset();
|
145
42
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
idx_t count = 0;
|
43
|
+
static unique_ptr<FileHandle> OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
|
44
|
+
FileCompressionType compression);
|
45
|
+
static unique_ptr<CSVFileHandle> OpenFile(FileSystem &fs, Allocator &allocator, const string &path,
|
46
|
+
FileCompressionType compression, bool enable_reset);
|
151
47
|
|
152
48
|
private:
|
49
|
+
FileSystem &fs;
|
50
|
+
Allocator &allocator;
|
153
51
|
unique_ptr<FileHandle> file_handle;
|
52
|
+
string path;
|
53
|
+
FileCompressionType compression;
|
154
54
|
bool reset_enabled = true;
|
155
55
|
bool can_seek = false;
|
156
|
-
bool
|
56
|
+
bool on_disk_file = false;
|
157
57
|
idx_t file_size = 0;
|
158
58
|
// reset support
|
159
|
-
|
59
|
+
AllocatedData cached_buffer;
|
160
60
|
idx_t read_position = 0;
|
161
61
|
idx_t buffer_size = 0;
|
162
62
|
idx_t buffer_capacity = 0;
|
@@ -67,7 +67,7 @@ struct CSVBufferRead {
|
|
67
67
|
} else {
|
68
68
|
// 3) It starts in the current buffer and ends in the next buffer
|
69
69
|
D_ASSERT(next_buffer);
|
70
|
-
auto intersection =
|
70
|
+
auto intersection = make_unsafe_array<char>(length);
|
71
71
|
idx_t cur_pos = 0;
|
72
72
|
auto buffer_ptr = buffer->Ptr();
|
73
73
|
for (idx_t i = start_buffer; i < buffer->GetBufferSize(); i++) {
|
@@ -85,7 +85,7 @@ struct CSVBufferRead {
|
|
85
85
|
|
86
86
|
shared_ptr<CSVBuffer> buffer;
|
87
87
|
shared_ptr<CSVBuffer> next_buffer;
|
88
|
-
vector<
|
88
|
+
vector<unsafe_array_ptr<char>> intersections;
|
89
89
|
optional_ptr<LineInfo> line_info;
|
90
90
|
|
91
91
|
idx_t buffer_start;
|
@@ -46,9 +46,9 @@ protected:
|
|
46
46
|
// The actual pointer to the data
|
47
47
|
data_ptr_t data;
|
48
48
|
//! The owned data of the HT
|
49
|
-
|
49
|
+
unsafe_array_ptr<data_t> owned_data;
|
50
50
|
//! Information on whether or not a specific group has any entries
|
51
|
-
|
51
|
+
unsafe_array_ptr<bool> group_is_set;
|
52
52
|
|
53
53
|
//! The minimum values for each of the group columns
|
54
54
|
vector<Value> group_minima;
|
@@ -113,7 +113,7 @@ private:
|
|
113
113
|
Vector statev;
|
114
114
|
|
115
115
|
//! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes
|
116
|
-
|
116
|
+
unsafe_array_ptr<data_t> levels_flat_native;
|
117
117
|
//! For each level, the starting location in the levels_flat_native array
|
118
118
|
vector<idx_t> levels_flat_start;
|
119
119
|
|
@@ -55,7 +55,7 @@ struct WriteCSVData : public BaseCSVData {
|
|
55
55
|
//! The size of the CSV file (in bytes) that we buffer before we flush it to disk
|
56
56
|
idx_t flush_size = 4096 * 8;
|
57
57
|
//! For each byte whether or not the CSV file requires quotes when containing the byte
|
58
|
-
|
58
|
+
unsafe_array_ptr<bool> requires_quotes;
|
59
59
|
};
|
60
60
|
|
61
61
|
struct ColumnInfo {
|
@@ -65,7 +65,7 @@ struct ColumnInfo {
|
|
65
65
|
names = std::move(names_p);
|
66
66
|
types = std::move(types_p);
|
67
67
|
}
|
68
|
-
void Serialize(FieldWriter &writer) {
|
68
|
+
void Serialize(FieldWriter &writer) const {
|
69
69
|
writer.WriteList<string>(names);
|
70
70
|
writer.WriteRegularSerializableList<LogicalType>(types);
|
71
71
|
}
|
@@ -99,8 +99,6 @@ struct ReadCSVData : public BaseCSVData {
|
|
99
99
|
bool single_threaded = false;
|
100
100
|
//! Reader bind data
|
101
101
|
MultiFileReaderBindData reader_bind;
|
102
|
-
//! If any file is a pipe
|
103
|
-
bool is_pipe = false;
|
104
102
|
vector<ColumnInfo> column_info;
|
105
103
|
|
106
104
|
void Initialize(unique_ptr<BufferedCSVReader> &reader) {
|
@@ -27,12 +27,12 @@ struct SingleJoinRelation {
|
|
27
27
|
|
28
28
|
//! Set of relations, used in the join graph.
|
29
29
|
struct JoinRelationSet {
|
30
|
-
JoinRelationSet(
|
30
|
+
JoinRelationSet(unsafe_array_ptr<idx_t> relations, idx_t count) : relations(std::move(relations)), count(count) {
|
31
31
|
}
|
32
32
|
|
33
33
|
string ToString() const;
|
34
34
|
|
35
|
-
|
35
|
+
unsafe_array_ptr<idx_t> relations;
|
36
36
|
idx_t count;
|
37
37
|
|
38
38
|
static bool IsSubset(JoinRelationSet &super, JoinRelationSet &sub);
|
@@ -55,7 +55,7 @@ public:
|
|
55
55
|
//! Create or get a JoinRelationSet from a set of relation bindings
|
56
56
|
JoinRelationSet &GetJoinRelation(unordered_set<idx_t> &bindings);
|
57
57
|
//! Create or get a JoinRelationSet from a (sorted, duplicate-free!) list of relations
|
58
|
-
JoinRelationSet &GetJoinRelation(
|
58
|
+
JoinRelationSet &GetJoinRelation(unsafe_array_ptr<idx_t> relations, idx_t count);
|
59
59
|
//! Union two sets of relations together and create a new relation set
|
60
60
|
JoinRelationSet &Union(JoinRelationSet &left, JoinRelationSet &right);
|
61
61
|
// //! Create the set difference of left \ right (i.e. all elements in left that are not in right)
|
@@ -24,6 +24,8 @@ enum class OnConflictAction : uint8_t {
|
|
24
24
|
REPLACE // Only used in transform/bind step, changed to UPDATE later
|
25
25
|
};
|
26
26
|
|
27
|
+
enum class InsertColumnOrder : uint8_t { INSERT_BY_POSITION = 0, INSERT_BY_NAME = 1 };
|
28
|
+
|
27
29
|
class OnConflictInfo {
|
28
30
|
public:
|
29
31
|
OnConflictInfo();
|
@@ -75,6 +77,9 @@ public:
|
|
75
77
|
//! Whether or not this a DEFAULT VALUES
|
76
78
|
bool default_values = false;
|
77
79
|
|
80
|
+
//! INSERT BY POSITION or INSERT BY NAME
|
81
|
+
InsertColumnOrder column_order = InsertColumnOrder::INSERT_BY_POSITION;
|
82
|
+
|
78
83
|
protected:
|
79
84
|
InsertStatement(const InsertStatement &other);
|
80
85
|
|
@@ -39,6 +39,7 @@ class LogicalProjection;
|
|
39
39
|
class ColumnList;
|
40
40
|
class ExternalDependency;
|
41
41
|
class TableFunction;
|
42
|
+
class TableStorageInfo;
|
42
43
|
|
43
44
|
struct CreateInfo;
|
44
45
|
struct BoundCreateTableInfo;
|
@@ -167,8 +168,8 @@ public:
|
|
167
168
|
unique_ptr<LogicalOperator> BindUpdateSet(LogicalOperator &op, unique_ptr<LogicalOperator> root,
|
168
169
|
UpdateSetInfo &set_info, TableCatalogEntry &table,
|
169
170
|
vector<PhysicalIndex> &columns);
|
170
|
-
void BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
|
171
|
-
TableCatalogEntry &table);
|
171
|
+
void BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
|
172
|
+
TableCatalogEntry &table, TableStorageInfo &storage_info);
|
172
173
|
void BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &table, InsertStatement &stmt);
|
173
174
|
|
174
175
|
static void BindSchemaOrCatalog(ClientContext &context, string &catalog, string &schema);
|
@@ -20,7 +20,7 @@ struct ArenaChunk {
|
|
20
20
|
AllocatedData data;
|
21
21
|
idx_t current_position;
|
22
22
|
idx_t maximum_size;
|
23
|
-
|
23
|
+
unsafe_unique_ptr<ArenaChunk> next;
|
24
24
|
ArenaChunk *prev;
|
25
25
|
};
|
26
26
|
|
@@ -56,7 +56,7 @@ private:
|
|
56
56
|
//! Internal allocator that is used by the arena allocator
|
57
57
|
Allocator &allocator;
|
58
58
|
idx_t current_capacity;
|
59
|
-
|
59
|
+
unsafe_unique_ptr<ArenaChunk> head;
|
60
60
|
ArenaChunk *tail;
|
61
61
|
//! An allocator wrapper using this arena allocator
|
62
62
|
Allocator arena_allocator;
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/storage/storage_info.hpp"
|
12
|
+
#include "duckdb/common/file_buffer.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
class BlockHandle;
|
@@ -30,9 +31,15 @@ public:
|
|
30
31
|
//! Returns whether or not the BufferHandle is valid.
|
31
32
|
DUCKDB_API bool IsValid() const;
|
32
33
|
//! Returns a pointer to the buffer data. Handle must be valid.
|
33
|
-
|
34
|
+
inline data_ptr_t Ptr() const {
|
35
|
+
D_ASSERT(IsValid());
|
36
|
+
return node->buffer;
|
37
|
+
}
|
34
38
|
//! Returns a pointer to the buffer data. Handle must be valid.
|
35
|
-
|
39
|
+
inline data_ptr_t Ptr() {
|
40
|
+
D_ASSERT(IsValid());
|
41
|
+
return node->buffer;
|
42
|
+
}
|
36
43
|
//! Gets the underlying file buffer. Handle must be valid.
|
37
44
|
DUCKDB_API FileBuffer &GetFileBuffer();
|
38
45
|
//! Destroys the buffer handle
|
@@ -44,7 +44,7 @@ struct RowGroupAppendState {
|
|
44
44
|
//! The current row_group we are appending to
|
45
45
|
RowGroup *row_group;
|
46
46
|
//! The column append states
|
47
|
-
|
47
|
+
unsafe_array_ptr<ColumnAppendState> states;
|
48
48
|
//! Offset within the row_group
|
49
49
|
idx_t offset_in_row_group;
|
50
50
|
};
|
@@ -99,7 +99,7 @@ public:
|
|
99
99
|
//! The maximum row within the row group
|
100
100
|
idx_t max_row_group_row;
|
101
101
|
//! Child column scans
|
102
|
-
|
102
|
+
unsafe_array_ptr<ColumnScanState> column_scans;
|
103
103
|
//! Row group segment tree
|
104
104
|
RowGroupSegmentTree *row_groups;
|
105
105
|
//! The total maximum row index
|
@@ -1013,6 +1013,7 @@ void ClientContext::TryBindRelation(Relation &relation, vector<ColumnDefinition>
|
|
1013
1013
|
D_ASSERT(!relation.GetAlias().empty());
|
1014
1014
|
D_ASSERT(!relation.ToString().empty());
|
1015
1015
|
#endif
|
1016
|
+
client_data->http_state = make_uniq<HTTPState>();
|
1016
1017
|
RunFunctionInTransaction([&]() {
|
1017
1018
|
// bind the expressions
|
1018
1019
|
auto binder = Binder::CreateBinder(*this);
|
@@ -574,7 +574,7 @@ void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode &node, LogicalO
|
|
574
574
|
D_ASSERT(node.set.count == 1);
|
575
575
|
auto relation_id = node.set.relations[0];
|
576
576
|
|
577
|
-
double lowest_card_found =
|
577
|
+
double lowest_card_found = node.GetBaseTableCardinality();
|
578
578
|
for (auto &column : relation_attributes[relation_id].columns) {
|
579
579
|
auto card_after_filters = node.GetBaseTableCardinality();
|
580
580
|
ColumnBinding key = ColumnBinding(relation_id, column);
|
@@ -147,6 +147,18 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op,
|
|
147
147
|
}
|
148
148
|
}
|
149
149
|
}
|
150
|
+
if (op->type == LogicalOperatorType::LOGICAL_ANY_JOIN && non_reorderable_operation) {
|
151
|
+
auto &join = op->Cast<LogicalAnyJoin>();
|
152
|
+
if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
|
153
|
+
auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
|
154
|
+
auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
|
155
|
+
if (rhs_cardinality > lhs_cardinality * 2) {
|
156
|
+
join.join_type = JoinType::RIGHT;
|
157
|
+
std::swap(join.children[0], join.children[1]);
|
158
|
+
}
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
150
162
|
if (non_reorderable_operation) {
|
151
163
|
// we encountered a non-reordable operation (setop or non-inner join)
|
152
164
|
// we do not reorder non-inner joins yet, however we do want to expand the potential join graph around them
|
@@ -648,7 +660,19 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
|
|
648
660
|
// we have to add a cross product; we add it between the two smallest relations
|
649
661
|
optional_ptr<JoinNode> smallest_plans[2];
|
650
662
|
idx_t smallest_index[2];
|
651
|
-
|
663
|
+
D_ASSERT(join_relations.size() >= 2);
|
664
|
+
|
665
|
+
// first just add the first two join relations. It doesn't matter the cost as the JOO
|
666
|
+
// will swap them on estimated cardinality anyway.
|
667
|
+
for (idx_t i = 0; i < 2; i++) {
|
668
|
+
auto current_plan = plans[&join_relations[i].get()].get();
|
669
|
+
smallest_plans[i] = current_plan;
|
670
|
+
smallest_index[i] = i;
|
671
|
+
}
|
672
|
+
|
673
|
+
// if there are any other join relations that don't have connections
|
674
|
+
// add them if they have lower estimated cardinality.
|
675
|
+
for (idx_t i = 2; i < join_relations.size(); i++) {
|
652
676
|
// get the plan for this relation
|
653
677
|
auto current_plan = plans[&join_relations[i].get()].get();
|
654
678
|
// check if the cardinality is smaller than the smallest two found so far
|
@@ -35,7 +35,7 @@ bool JoinRelationSet::IsSubset(JoinRelationSet &super, JoinRelationSet &sub) {
|
|
35
35
|
return false;
|
36
36
|
}
|
37
37
|
|
38
|
-
JoinRelationSet &JoinRelationSetManager::GetJoinRelation(
|
38
|
+
JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unsafe_array_ptr<idx_t> relations, idx_t count) {
|
39
39
|
// now look it up in the tree
|
40
40
|
reference<JoinRelationTreeNode> info(root);
|
41
41
|
for (idx_t i = 0; i < count; i++) {
|
@@ -59,7 +59,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unique_ptr<idx_t[]> rel
|
|
59
59
|
//! Create or get a JoinRelationSet from a single node with the given index
|
60
60
|
JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
|
61
61
|
// create a sorted vector of the relations
|
62
|
-
auto relations =
|
62
|
+
auto relations = make_unsafe_array<idx_t>(1);
|
63
63
|
relations[0] = index;
|
64
64
|
idx_t count = 1;
|
65
65
|
return GetJoinRelation(std::move(relations), count);
|
@@ -67,7 +67,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
|
|
67
67
|
|
68
68
|
JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &bindings) {
|
69
69
|
// create a sorted vector of the relations
|
70
|
-
|
70
|
+
unsafe_array_ptr<idx_t> relations = bindings.empty() ? nullptr : make_unsafe_array<idx_t>(bindings.size());
|
71
71
|
idx_t count = 0;
|
72
72
|
for (auto &entry : bindings) {
|
73
73
|
relations[count++] = entry;
|
@@ -77,7 +77,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &b
|
|
77
77
|
}
|
78
78
|
|
79
79
|
JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelationSet &right) {
|
80
|
-
auto relations =
|
80
|
+
auto relations = make_unsafe_array<idx_t>(left.count + right.count);
|
81
81
|
idx_t count = 0;
|
82
82
|
// move through the left and right relations, eliminating duplicates
|
83
83
|
idx_t i = 0, j = 0;
|
@@ -113,7 +113,7 @@ JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelati
|
|
113
113
|
}
|
114
114
|
|
115
115
|
// JoinRelationSet *JoinRelationSetManager::Difference(JoinRelationSet *left, JoinRelationSet *right) {
|
116
|
-
// auto relations =
|
116
|
+
// auto relations = unsafe_array_ptr<idx_t>(new idx_t[left->count]);
|
117
117
|
// idx_t count = 0;
|
118
118
|
// // move through the left and right relations
|
119
119
|
// idx_t i = 0, j = 0;
|