duckdb 0.7.2-dev3441.0 → 0.7.2-dev3515.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -4
- package/src/duckdb/src/common/compressed_file_system.cpp +2 -2
- package/src/duckdb/src/common/file_system.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
- package/src/duckdb/src/common/serializer/buffered_serializer.cpp +4 -3
- package/src/duckdb/src/common/serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/radix_sort.cpp +5 -5
- package/src/duckdb/src/common/string_util.cpp +2 -2
- package/src/duckdb/src/common/types/bit.cpp +2 -2
- package/src/duckdb/src/common/types/blob.cpp +2 -2
- package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
- package/src/duckdb/src/common/types/date.cpp +1 -1
- package/src/duckdb/src/common/types/decimal.cpp +2 -2
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +14 -2
- package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
- package/src/duckdb/src/common/types/time.cpp +1 -1
- package/src/duckdb/src/common/types/vector.cpp +7 -7
- package/src/duckdb/src/common/virtual_file_system.cpp +4 -0
- package/src/duckdb/src/common/windows_util.cpp +2 -2
- package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +6 -3
- package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -5
- package/src/duckdb/src/core_functions/scalar/string/printf.cpp +1 -1
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
- package/src/duckdb/src/execution/join_hashtable.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
- package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -7
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +4 -41
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
- package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +1 -1
- package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +2 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +3 -4
- package/src/duckdb/src/execution/window_segment_tree.cpp +1 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
- package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/concat.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
- package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
- package/src/duckdb/src/function/table/read_csv.cpp +43 -35
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -5
- package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -9
- package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_serializer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -3
- package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +11 -6
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +53 -22
- package/src/duckdb/src/include/duckdb/common/vector.hpp +5 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +27 -127
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +2 -4
- package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +9 -2
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
- package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +5 -5
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +14 -6
- package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -1
- package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +10 -0
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +32 -7
- package/src/duckdb/src/storage/arena_allocator.cpp +1 -1
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -11
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -2
- package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
- package/src/duckdb/src/storage/table/row_group.cpp +2 -2
- package/src/duckdb/src/storage/table/update_segment.cpp +7 -6
- package/src/duckdb/third_party/fsst/libfsst.cpp +1 -2
- package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +9 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +13 -12
- package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12537 -12415
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/statement.cpp +15 -13
@@ -11,152 +11,52 @@
|
|
11
11
|
#include "duckdb/common/file_system.hpp"
|
12
12
|
#include "duckdb/common/mutex.hpp"
|
13
13
|
#include "duckdb/common/helper.hpp"
|
14
|
+
#include "duckdb/common/allocator.hpp"
|
14
15
|
|
15
16
|
namespace duckdb {
|
17
|
+
class Allocator;
|
18
|
+
class FileSystem;
|
16
19
|
|
17
20
|
struct CSVFileHandle {
|
18
21
|
public:
|
19
|
-
|
20
|
-
|
21
|
-
can_seek = file_handle->CanSeek();
|
22
|
-
plain_file_source = file_handle->OnDiskFile() && can_seek;
|
23
|
-
file_size = file_handle->GetFileSize();
|
24
|
-
}
|
22
|
+
CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<FileHandle> file_handle_p, const string &path_p,
|
23
|
+
FileCompressionType compression, bool enable_reset = true);
|
25
24
|
|
26
|
-
|
27
|
-
return can_seek;
|
28
|
-
}
|
29
|
-
void Seek(idx_t position) {
|
30
|
-
if (!can_seek) {
|
31
|
-
throw InternalException("Cannot seek in this file");
|
32
|
-
}
|
33
|
-
file_handle->Seek(position);
|
34
|
-
}
|
35
|
-
idx_t SeekPosition() {
|
36
|
-
if (!can_seek) {
|
37
|
-
throw InternalException("Cannot seek in this file");
|
38
|
-
}
|
39
|
-
return file_handle->SeekPosition();
|
40
|
-
}
|
41
|
-
void Reset() {
|
42
|
-
if (plain_file_source) {
|
43
|
-
file_handle->Reset();
|
44
|
-
} else {
|
45
|
-
if (!reset_enabled) {
|
46
|
-
throw InternalException("Reset called but reset is not enabled for this CSV Handle");
|
47
|
-
}
|
48
|
-
read_position = 0;
|
49
|
-
}
|
50
|
-
}
|
51
|
-
bool PlainFileSource() {
|
52
|
-
return plain_file_source;
|
53
|
-
}
|
54
|
-
|
55
|
-
bool OnDiskFile() {
|
56
|
-
return file_handle->OnDiskFile();
|
57
|
-
}
|
58
|
-
|
59
|
-
idx_t FileSize() {
|
60
|
-
return file_size;
|
61
|
-
}
|
25
|
+
mutex main_mutex;
|
62
26
|
|
63
|
-
|
64
|
-
|
65
|
-
|
27
|
+
public:
|
28
|
+
bool CanSeek();
|
29
|
+
void Seek(idx_t position);
|
30
|
+
idx_t SeekPosition();
|
31
|
+
void Reset();
|
32
|
+
bool OnDiskFile();
|
66
33
|
|
67
|
-
idx_t
|
68
|
-
requested_bytes += nr_bytes;
|
69
|
-
if (!plain_file_source) {
|
70
|
-
// not a plain file source: we need to do some bookkeeping around the reset functionality
|
71
|
-
idx_t result_offset = 0;
|
72
|
-
if (read_position < buffer_size) {
|
73
|
-
// we need to read from our cached buffer
|
74
|
-
auto buffer_read_count = MinValue<idx_t>(nr_bytes, buffer_size - read_position);
|
75
|
-
memcpy(buffer, cached_buffer.get() + read_position, buffer_read_count);
|
76
|
-
result_offset += buffer_read_count;
|
77
|
-
read_position += buffer_read_count;
|
78
|
-
if (result_offset == nr_bytes) {
|
79
|
-
return nr_bytes;
|
80
|
-
}
|
81
|
-
} else if (!reset_enabled && cached_buffer) {
|
82
|
-
// reset is disabled, but we still have cached data
|
83
|
-
// we can remove any cached data
|
84
|
-
cached_buffer.reset();
|
85
|
-
buffer_size = 0;
|
86
|
-
buffer_capacity = 0;
|
87
|
-
read_position = 0;
|
88
|
-
}
|
89
|
-
// we have data left to read from the file
|
90
|
-
// read directly into the buffer
|
91
|
-
auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
|
92
|
-
file_size = file_handle->GetFileSize();
|
93
|
-
read_position += bytes_read;
|
94
|
-
if (reset_enabled) {
|
95
|
-
// if reset caching is enabled, we need to cache the bytes that we have read
|
96
|
-
if (buffer_size + bytes_read >= buffer_capacity) {
|
97
|
-
// no space; first enlarge the buffer
|
98
|
-
buffer_capacity = MaxValue<idx_t>(NextPowerOfTwo(buffer_size + bytes_read), buffer_capacity * 2);
|
34
|
+
idx_t FileSize();
|
99
35
|
|
100
|
-
|
101
|
-
if (buffer_size > 0) {
|
102
|
-
memcpy(new_buffer.get(), cached_buffer.get(), buffer_size);
|
103
|
-
}
|
104
|
-
cached_buffer = std::move(new_buffer);
|
105
|
-
}
|
106
|
-
memcpy(cached_buffer.get() + buffer_size, (char *)buffer + result_offset, bytes_read);
|
107
|
-
buffer_size += bytes_read;
|
108
|
-
}
|
36
|
+
bool FinishedReading();
|
109
37
|
|
110
|
-
|
111
|
-
} else {
|
112
|
-
return file_handle->Read(buffer, nr_bytes);
|
113
|
-
}
|
114
|
-
}
|
38
|
+
idx_t Read(void *buffer, idx_t nr_bytes);
|
115
39
|
|
116
|
-
string ReadLine()
|
117
|
-
|
118
|
-
string result;
|
119
|
-
char buffer[1];
|
120
|
-
while (true) {
|
121
|
-
idx_t bytes_read = Read(buffer, 1);
|
122
|
-
if (bytes_read == 0) {
|
123
|
-
return result;
|
124
|
-
}
|
125
|
-
if (carriage_return) {
|
126
|
-
if (buffer[0] != '\n') {
|
127
|
-
if (!file_handle->CanSeek()) {
|
128
|
-
throw BinderException(
|
129
|
-
"Carriage return newlines not supported when reading CSV files in which we cannot seek");
|
130
|
-
}
|
131
|
-
file_handle->Seek(file_handle->SeekPosition() - 1);
|
132
|
-
return result;
|
133
|
-
}
|
134
|
-
}
|
135
|
-
if (buffer[0] == '\n') {
|
136
|
-
return result;
|
137
|
-
}
|
138
|
-
if (buffer[0] != '\r') {
|
139
|
-
result += buffer[0];
|
140
|
-
} else {
|
141
|
-
carriage_return = true;
|
142
|
-
}
|
143
|
-
}
|
144
|
-
}
|
40
|
+
string ReadLine();
|
41
|
+
void DisableReset();
|
145
42
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
idx_t count = 0;
|
43
|
+
static unique_ptr<FileHandle> OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
|
44
|
+
FileCompressionType compression);
|
45
|
+
static unique_ptr<CSVFileHandle> OpenFile(FileSystem &fs, Allocator &allocator, const string &path,
|
46
|
+
FileCompressionType compression, bool enable_reset);
|
151
47
|
|
152
48
|
private:
|
49
|
+
FileSystem &fs;
|
50
|
+
Allocator &allocator;
|
153
51
|
unique_ptr<FileHandle> file_handle;
|
52
|
+
string path;
|
53
|
+
FileCompressionType compression;
|
154
54
|
bool reset_enabled = true;
|
155
55
|
bool can_seek = false;
|
156
|
-
bool
|
56
|
+
bool on_disk_file = false;
|
157
57
|
idx_t file_size = 0;
|
158
58
|
// reset support
|
159
|
-
|
59
|
+
AllocatedData cached_buffer;
|
160
60
|
idx_t read_position = 0;
|
161
61
|
idx_t buffer_size = 0;
|
162
62
|
idx_t buffer_capacity = 0;
|
@@ -67,7 +67,7 @@ struct CSVBufferRead {
|
|
67
67
|
} else {
|
68
68
|
// 3) It starts in the current buffer and ends in the next buffer
|
69
69
|
D_ASSERT(next_buffer);
|
70
|
-
auto intersection =
|
70
|
+
auto intersection = make_unsafe_array<char>(length);
|
71
71
|
idx_t cur_pos = 0;
|
72
72
|
auto buffer_ptr = buffer->Ptr();
|
73
73
|
for (idx_t i = start_buffer; i < buffer->GetBufferSize(); i++) {
|
@@ -85,7 +85,7 @@ struct CSVBufferRead {
|
|
85
85
|
|
86
86
|
shared_ptr<CSVBuffer> buffer;
|
87
87
|
shared_ptr<CSVBuffer> next_buffer;
|
88
|
-
vector<
|
88
|
+
vector<unsafe_array_ptr<char>> intersections;
|
89
89
|
optional_ptr<LineInfo> line_info;
|
90
90
|
|
91
91
|
idx_t buffer_start;
|
@@ -46,9 +46,9 @@ protected:
|
|
46
46
|
// The actual pointer to the data
|
47
47
|
data_ptr_t data;
|
48
48
|
//! The owned data of the HT
|
49
|
-
|
49
|
+
unsafe_array_ptr<data_t> owned_data;
|
50
50
|
//! Information on whether or not a specific group has any entries
|
51
|
-
|
51
|
+
unsafe_array_ptr<bool> group_is_set;
|
52
52
|
|
53
53
|
//! The minimum values for each of the group columns
|
54
54
|
vector<Value> group_minima;
|
@@ -113,7 +113,7 @@ private:
|
|
113
113
|
Vector statev;
|
114
114
|
|
115
115
|
//! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes
|
116
|
-
|
116
|
+
unsafe_array_ptr<data_t> levels_flat_native;
|
117
117
|
//! For each level, the starting location in the levels_flat_native array
|
118
118
|
vector<idx_t> levels_flat_start;
|
119
119
|
|
@@ -55,7 +55,7 @@ struct WriteCSVData : public BaseCSVData {
|
|
55
55
|
//! The size of the CSV file (in bytes) that we buffer before we flush it to disk
|
56
56
|
idx_t flush_size = 4096 * 8;
|
57
57
|
//! For each byte whether or not the CSV file requires quotes when containing the byte
|
58
|
-
|
58
|
+
unsafe_array_ptr<bool> requires_quotes;
|
59
59
|
};
|
60
60
|
|
61
61
|
struct ColumnInfo {
|
@@ -65,7 +65,7 @@ struct ColumnInfo {
|
|
65
65
|
names = std::move(names_p);
|
66
66
|
types = std::move(types_p);
|
67
67
|
}
|
68
|
-
void Serialize(FieldWriter &writer) {
|
68
|
+
void Serialize(FieldWriter &writer) const {
|
69
69
|
writer.WriteList<string>(names);
|
70
70
|
writer.WriteRegularSerializableList<LogicalType>(types);
|
71
71
|
}
|
@@ -99,8 +99,6 @@ struct ReadCSVData : public BaseCSVData {
|
|
99
99
|
bool single_threaded = false;
|
100
100
|
//! Reader bind data
|
101
101
|
MultiFileReaderBindData reader_bind;
|
102
|
-
//! If any file is a pipe
|
103
|
-
bool is_pipe = false;
|
104
102
|
vector<ColumnInfo> column_info;
|
105
103
|
|
106
104
|
void Initialize(unique_ptr<BufferedCSVReader> &reader) {
|
@@ -27,12 +27,12 @@ struct SingleJoinRelation {
|
|
27
27
|
|
28
28
|
//! Set of relations, used in the join graph.
|
29
29
|
struct JoinRelationSet {
|
30
|
-
JoinRelationSet(
|
30
|
+
JoinRelationSet(unsafe_array_ptr<idx_t> relations, idx_t count) : relations(std::move(relations)), count(count) {
|
31
31
|
}
|
32
32
|
|
33
33
|
string ToString() const;
|
34
34
|
|
35
|
-
|
35
|
+
unsafe_array_ptr<idx_t> relations;
|
36
36
|
idx_t count;
|
37
37
|
|
38
38
|
static bool IsSubset(JoinRelationSet &super, JoinRelationSet &sub);
|
@@ -55,7 +55,7 @@ public:
|
|
55
55
|
//! Create or get a JoinRelationSet from a set of relation bindings
|
56
56
|
JoinRelationSet &GetJoinRelation(unordered_set<idx_t> &bindings);
|
57
57
|
//! Create or get a JoinRelationSet from a (sorted, duplicate-free!) list of relations
|
58
|
-
JoinRelationSet &GetJoinRelation(
|
58
|
+
JoinRelationSet &GetJoinRelation(unsafe_array_ptr<idx_t> relations, idx_t count);
|
59
59
|
//! Union two sets of relations together and create a new relation set
|
60
60
|
JoinRelationSet &Union(JoinRelationSet &left, JoinRelationSet &right);
|
61
61
|
// //! Create the set difference of left \ right (i.e. all elements in left that are not in right)
|
@@ -24,6 +24,8 @@ enum class OnConflictAction : uint8_t {
|
|
24
24
|
REPLACE // Only used in transform/bind step, changed to UPDATE later
|
25
25
|
};
|
26
26
|
|
27
|
+
enum class InsertColumnOrder : uint8_t { INSERT_BY_POSITION = 0, INSERT_BY_NAME = 1 };
|
28
|
+
|
27
29
|
class OnConflictInfo {
|
28
30
|
public:
|
29
31
|
OnConflictInfo();
|
@@ -75,6 +77,9 @@ public:
|
|
75
77
|
//! Whether or not this a DEFAULT VALUES
|
76
78
|
bool default_values = false;
|
77
79
|
|
80
|
+
//! INSERT BY POSITION or INSERT BY NAME
|
81
|
+
InsertColumnOrder column_order = InsertColumnOrder::INSERT_BY_POSITION;
|
82
|
+
|
78
83
|
protected:
|
79
84
|
InsertStatement(const InsertStatement &other);
|
80
85
|
|
@@ -20,7 +20,7 @@ struct ArenaChunk {
|
|
20
20
|
AllocatedData data;
|
21
21
|
idx_t current_position;
|
22
22
|
idx_t maximum_size;
|
23
|
-
|
23
|
+
unsafe_unique_ptr<ArenaChunk> next;
|
24
24
|
ArenaChunk *prev;
|
25
25
|
};
|
26
26
|
|
@@ -56,7 +56,7 @@ private:
|
|
56
56
|
//! Internal allocator that is used by the arena allocator
|
57
57
|
Allocator &allocator;
|
58
58
|
idx_t current_capacity;
|
59
|
-
|
59
|
+
unsafe_unique_ptr<ArenaChunk> head;
|
60
60
|
ArenaChunk *tail;
|
61
61
|
//! An allocator wrapper using this arena allocator
|
62
62
|
Allocator arena_allocator;
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/storage/storage_info.hpp"
|
12
|
+
#include "duckdb/common/file_buffer.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
class BlockHandle;
|
@@ -30,9 +31,15 @@ public:
|
|
30
31
|
//! Returns whether or not the BufferHandle is valid.
|
31
32
|
DUCKDB_API bool IsValid() const;
|
32
33
|
//! Returns a pointer to the buffer data. Handle must be valid.
|
33
|
-
|
34
|
+
inline data_ptr_t Ptr() const {
|
35
|
+
D_ASSERT(IsValid());
|
36
|
+
return node->buffer;
|
37
|
+
}
|
34
38
|
//! Returns a pointer to the buffer data. Handle must be valid.
|
35
|
-
|
39
|
+
inline data_ptr_t Ptr() {
|
40
|
+
D_ASSERT(IsValid());
|
41
|
+
return node->buffer;
|
42
|
+
}
|
36
43
|
//! Gets the underlying file buffer. Handle must be valid.
|
37
44
|
DUCKDB_API FileBuffer &GetFileBuffer();
|
38
45
|
//! Destroys the buffer handle
|
@@ -44,7 +44,7 @@ struct RowGroupAppendState {
|
|
44
44
|
//! The current row_group we are appending to
|
45
45
|
RowGroup *row_group;
|
46
46
|
//! The column append states
|
47
|
-
|
47
|
+
unsafe_array_ptr<ColumnAppendState> states;
|
48
48
|
//! Offset within the row_group
|
49
49
|
idx_t offset_in_row_group;
|
50
50
|
};
|
@@ -99,7 +99,7 @@ public:
|
|
99
99
|
//! The maximum row within the row group
|
100
100
|
idx_t max_row_group_row;
|
101
101
|
//! Child column scans
|
102
|
-
|
102
|
+
unsafe_array_ptr<ColumnScanState> column_scans;
|
103
103
|
//! Row group segment tree
|
104
104
|
RowGroupSegmentTree *row_groups;
|
105
105
|
//! The total maximum row index
|
@@ -35,7 +35,7 @@ bool JoinRelationSet::IsSubset(JoinRelationSet &super, JoinRelationSet &sub) {
|
|
35
35
|
return false;
|
36
36
|
}
|
37
37
|
|
38
|
-
JoinRelationSet &JoinRelationSetManager::GetJoinRelation(
|
38
|
+
JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unsafe_array_ptr<idx_t> relations, idx_t count) {
|
39
39
|
// now look it up in the tree
|
40
40
|
reference<JoinRelationTreeNode> info(root);
|
41
41
|
for (idx_t i = 0; i < count; i++) {
|
@@ -59,7 +59,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unique_ptr<idx_t[]> rel
|
|
59
59
|
//! Create or get a JoinRelationSet from a single node with the given index
|
60
60
|
JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
|
61
61
|
// create a sorted vector of the relations
|
62
|
-
auto relations =
|
62
|
+
auto relations = make_unsafe_array<idx_t>(1);
|
63
63
|
relations[0] = index;
|
64
64
|
idx_t count = 1;
|
65
65
|
return GetJoinRelation(std::move(relations), count);
|
@@ -67,7 +67,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
|
|
67
67
|
|
68
68
|
JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &bindings) {
|
69
69
|
// create a sorted vector of the relations
|
70
|
-
|
70
|
+
unsafe_array_ptr<idx_t> relations = bindings.empty() ? nullptr : make_unsafe_array<idx_t>(bindings.size());
|
71
71
|
idx_t count = 0;
|
72
72
|
for (auto &entry : bindings) {
|
73
73
|
relations[count++] = entry;
|
@@ -77,7 +77,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &b
|
|
77
77
|
}
|
78
78
|
|
79
79
|
JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelationSet &right) {
|
80
|
-
auto relations =
|
80
|
+
auto relations = make_unsafe_array<idx_t>(left.count + right.count);
|
81
81
|
idx_t count = 0;
|
82
82
|
// move through the left and right relations, eliminating duplicates
|
83
83
|
idx_t i = 0, j = 0;
|
@@ -113,7 +113,7 @@ JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelati
|
|
113
113
|
}
|
114
114
|
|
115
115
|
// JoinRelationSet *JoinRelationSetManager::Difference(JoinRelationSet *left, JoinRelationSet *right) {
|
116
|
-
// auto relations =
|
116
|
+
// auto relations = unsafe_array_ptr<idx_t>(new idx_t[left->count]);
|
117
117
|
// idx_t count = 0;
|
118
118
|
// // move through the left and right relations
|
119
119
|
// idx_t i = 0, j = 0;
|
@@ -195,18 +195,24 @@ void UnnestRewriter::UpdateRHSBindings(unique_ptr<LogicalOperator> *plan_ptr, un
|
|
195
195
|
updater.replace_bindings.push_back(replace_binding);
|
196
196
|
}
|
197
197
|
|
198
|
-
// temporarily remove the
|
198
|
+
// temporarily remove the BOUND_UNNESTs and the child of the LOGICAL_UNNEST from the plan
|
199
199
|
D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
|
200
200
|
auto &unnest = curr_op->get()->Cast<LogicalUnnest>();
|
201
|
-
|
201
|
+
vector<unique_ptr<Expression>> temp_bound_unnests;
|
202
|
+
for (auto &temp_bound_unnest : unnest.expressions) {
|
203
|
+
temp_bound_unnests.push_back(std::move(temp_bound_unnest));
|
204
|
+
}
|
205
|
+
D_ASSERT(unnest.children.size() == 1);
|
202
206
|
auto temp_unnest_child = std::move(unnest.children[0]);
|
203
207
|
unnest.expressions.clear();
|
204
208
|
unnest.children.clear();
|
205
209
|
// update the bindings of the plan
|
206
210
|
updater.VisitOperator(*plan_ptr->get());
|
207
211
|
updater.replace_bindings.clear();
|
208
|
-
// add the
|
209
|
-
|
212
|
+
// add the children again
|
213
|
+
for (auto &temp_bound_unnest : temp_bound_unnests) {
|
214
|
+
unnest.expressions.push_back(std::move(temp_bound_unnest));
|
215
|
+
}
|
210
216
|
unnest.children.push_back(std::move(temp_unnest_child));
|
211
217
|
|
212
218
|
// add the LHS expressions to each LOGICAL_PROJECTION
|
@@ -256,6 +262,7 @@ void UnnestRewriter::UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &update
|
|
256
262
|
D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
|
257
263
|
auto &unnest = curr_op->get()->Cast<LogicalUnnest>();
|
258
264
|
|
265
|
+
D_ASSERT(unnest.children.size() == 1);
|
259
266
|
auto unnest_child_cols = unnest.children[0]->GetColumnBindings();
|
260
267
|
for (idx_t delim_col_idx = 0; delim_col_idx < delim_columns.size(); delim_col_idx++) {
|
261
268
|
for (idx_t child_col_idx = 0; child_col_idx < unnest_child_cols.size(); child_col_idx++) {
|
@@ -268,8 +275,9 @@ void UnnestRewriter::UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &update
|
|
268
275
|
}
|
269
276
|
|
270
277
|
// update bindings
|
271
|
-
|
272
|
-
|
278
|
+
for (auto &unnest_expr : unnest.expressions) {
|
279
|
+
updater.VisitExpression(&unnest_expr);
|
280
|
+
}
|
273
281
|
updater.replace_bindings.clear();
|
274
282
|
}
|
275
283
|
|
@@ -30,7 +30,7 @@ InsertStatement::InsertStatement(const InsertStatement &other)
|
|
30
30
|
: SQLStatement(other), select_statement(unique_ptr_cast<SQLStatement, SelectStatement>(
|
31
31
|
other.select_statement ? other.select_statement->Copy() : nullptr)),
|
32
32
|
columns(other.columns), table(other.table), schema(other.schema), catalog(other.catalog),
|
33
|
-
default_values(other.default_values) {
|
33
|
+
default_values(other.default_values), column_order(other.column_order) {
|
34
34
|
cte_map = other.cte_map.Copy();
|
35
35
|
for (auto &expr : other.returning_list) {
|
36
36
|
returning_list.emplace_back(expr->Copy());
|
@@ -81,6 +81,9 @@ string InsertStatement::ToString() const {
|
|
81
81
|
if (table_ref && !table_ref->alias.empty()) {
|
82
82
|
result += StringUtil::Format(" AS %s", KeywordHelper::WriteOptionallyQuoted(table_ref->alias));
|
83
83
|
}
|
84
|
+
if (column_order == InsertColumnOrder::INSERT_BY_NAME) {
|
85
|
+
result += " BY NAME";
|
86
|
+
}
|
84
87
|
if (!columns.empty()) {
|
85
88
|
result += " (";
|
86
89
|
for (idx_t i = 0; i < columns.size(); i++) {
|
@@ -67,6 +67,16 @@ unique_ptr<InsertStatement> Transformer::TransformInsert(duckdb_libpgquery::PGNo
|
|
67
67
|
result->on_conflict_info = DummyOnConflictClause(stmt->onConflictAlias, result->schema);
|
68
68
|
result->table_ref = TransformRangeVar(stmt->relation);
|
69
69
|
}
|
70
|
+
switch (stmt->insert_column_order) {
|
71
|
+
case duckdb_libpgquery::PG_INSERT_BY_POSITION:
|
72
|
+
result->column_order = InsertColumnOrder::INSERT_BY_POSITION;
|
73
|
+
break;
|
74
|
+
case duckdb_libpgquery::PG_INSERT_BY_NAME:
|
75
|
+
result->column_order = InsertColumnOrder::INSERT_BY_NAME;
|
76
|
+
break;
|
77
|
+
default:
|
78
|
+
throw InternalException("Unrecognized insert column order in TransformInsert");
|
79
|
+
}
|
70
80
|
result->catalog = qname.catalog;
|
71
81
|
return result;
|
72
82
|
}
|
@@ -406,6 +406,26 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
|
|
406
406
|
// Add CTEs as bindable
|
407
407
|
AddCTEMap(stmt.cte_map);
|
408
408
|
|
409
|
+
auto values_list = stmt.GetValuesList();
|
410
|
+
|
411
|
+
// bind the root select node (if any)
|
412
|
+
BoundStatement root_select;
|
413
|
+
if (stmt.column_order == InsertColumnOrder::INSERT_BY_NAME) {
|
414
|
+
if (values_list) {
|
415
|
+
throw BinderException("INSERT BY NAME can only be used when inserting from a SELECT statement");
|
416
|
+
}
|
417
|
+
if (!stmt.columns.empty()) {
|
418
|
+
throw BinderException("INSERT BY NAME cannot be combined with an explicit column list");
|
419
|
+
}
|
420
|
+
D_ASSERT(stmt.select_statement);
|
421
|
+
// INSERT BY NAME - generate the columns from the names of the SELECT statement
|
422
|
+
auto select_binder = Binder::CreateBinder(context, this);
|
423
|
+
root_select = select_binder->Bind(*stmt.select_statement);
|
424
|
+
MoveCorrelatedExpressions(*select_binder);
|
425
|
+
|
426
|
+
stmt.columns = root_select.names;
|
427
|
+
}
|
428
|
+
|
409
429
|
vector<LogicalIndex> named_column_map;
|
410
430
|
if (!stmt.columns.empty() || stmt.default_values) {
|
411
431
|
// insertion statement specifies column list
|
@@ -413,6 +433,10 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
|
|
413
433
|
// create a mapping of (list index) -> (column index)
|
414
434
|
case_insensitive_map_t<idx_t> column_name_map;
|
415
435
|
for (idx_t i = 0; i < stmt.columns.size(); i++) {
|
436
|
+
auto entry = column_name_map.insert(make_pair(stmt.columns[i], i));
|
437
|
+
if (!entry.second) {
|
438
|
+
throw BinderException("Duplicate column name \"%s\" in INSERT", stmt.columns[i]);
|
439
|
+
}
|
416
440
|
column_name_map[stmt.columns[i]] = i;
|
417
441
|
auto column_index = table.GetColumnIndex(stmt.columns[i]);
|
418
442
|
if (column_index.index == COLUMN_IDENTIFIER_ROW_ID) {
|
@@ -436,8 +460,8 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
|
|
436
460
|
}
|
437
461
|
}
|
438
462
|
} else {
|
439
|
-
//
|
440
|
-
//
|
463
|
+
// insert by position and no columns specified - insertion into all columns of the table
|
464
|
+
// intentionally don't populate 'column_index_map' as an indication of this
|
441
465
|
for (auto &col : table.GetColumns().Physical()) {
|
442
466
|
named_column_map.push_back(col.Logical());
|
443
467
|
insert->expected_types.push_back(col.Type());
|
@@ -454,7 +478,6 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
|
|
454
478
|
idx_t expected_columns = stmt.columns.empty() ? table.GetColumns().PhysicalColumnCount() : stmt.columns.size();
|
455
479
|
|
456
480
|
// special case: check if we are inserting from a VALUES statement
|
457
|
-
auto values_list = stmt.GetValuesList();
|
458
481
|
if (values_list) {
|
459
482
|
auto &expr_list = values_list->Cast<ExpressionListRef>();
|
460
483
|
expr_list.expected_types.resize(expected_columns);
|
@@ -487,10 +510,12 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
|
|
487
510
|
// parse select statement and add to logical plan
|
488
511
|
unique_ptr<LogicalOperator> root;
|
489
512
|
if (stmt.select_statement) {
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
513
|
+
if (stmt.column_order == InsertColumnOrder::INSERT_BY_POSITION) {
|
514
|
+
auto select_binder = Binder::CreateBinder(context, this);
|
515
|
+
root_select = select_binder->Bind(*stmt.select_statement);
|
516
|
+
MoveCorrelatedExpressions(*select_binder);
|
517
|
+
}
|
518
|
+
// inserting from a select - check if the column count matches
|
494
519
|
CheckInsertColumnCountMismatch(expected_columns, root_select.types.size(), !stmt.columns.empty(),
|
495
520
|
table.name.c_str());
|
496
521
|
|
@@ -64,7 +64,7 @@ data_ptr_t ArenaAllocator::Allocate(idx_t len) {
|
|
64
64
|
do {
|
65
65
|
current_capacity *= 2;
|
66
66
|
} while (current_capacity < len);
|
67
|
-
auto new_chunk =
|
67
|
+
auto new_chunk = make_unsafe_uniq<ArenaChunk>(allocator, current_capacity);
|
68
68
|
if (head) {
|
69
69
|
head->prev = new_chunk.get();
|
70
70
|
new_chunk->next = std::move(head);
|
@@ -7,7 +7,8 @@ namespace duckdb {
|
|
7
7
|
BufferHandle::BufferHandle() : handle(nullptr), node(nullptr) {
|
8
8
|
}
|
9
9
|
|
10
|
-
BufferHandle::BufferHandle(shared_ptr<BlockHandle>
|
10
|
+
BufferHandle::BufferHandle(shared_ptr<BlockHandle> handle_p, FileBuffer *node_p)
|
11
|
+
: handle(std::move(handle_p)), node(node_p) {
|
11
12
|
}
|
12
13
|
|
13
14
|
BufferHandle::BufferHandle(BufferHandle &&other) noexcept {
|
@@ -29,16 +30,6 @@ bool BufferHandle::IsValid() const {
|
|
29
30
|
return node != nullptr;
|
30
31
|
}
|
31
32
|
|
32
|
-
data_ptr_t BufferHandle::Ptr() const {
|
33
|
-
D_ASSERT(IsValid());
|
34
|
-
return node->buffer;
|
35
|
-
}
|
36
|
-
|
37
|
-
data_ptr_t BufferHandle::Ptr() {
|
38
|
-
D_ASSERT(IsValid());
|
39
|
-
return node->buffer;
|
40
|
-
}
|
41
|
-
|
42
33
|
void BufferHandle::Destroy() {
|
43
34
|
if (!handle || !IsValid()) {
|
44
35
|
return;
|
@@ -32,7 +32,7 @@ void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result
|
|
32
32
|
MiniZStream s;
|
33
33
|
size_t compressed_size = 0;
|
34
34
|
compressed_size = s.MaxCompressedLength(uncompressed_size);
|
35
|
-
auto compressed_buf =
|
35
|
+
auto compressed_buf = make_unsafe_array<data_t>(compressed_size);
|
36
36
|
s.Compress((const char *)string.GetData(), uncompressed_size, (char *)compressed_buf.get(), &compressed_size);
|
37
37
|
string_t compressed_string((const char *)compressed_buf.get(), compressed_size);
|
38
38
|
|
@@ -292,13 +292,13 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
|
|
292
292
|
offset += 2 * sizeof(uint32_t);
|
293
293
|
|
294
294
|
data_ptr_t decompression_ptr;
|
295
|
-
|
295
|
+
unsafe_array_ptr<data_t> decompression_buffer;
|
296
296
|
|
297
297
|
// If string is in single block we decompress straight from it, else we copy first
|
298
298
|
if (remaining <= Storage::BLOCK_SIZE - sizeof(block_id_t) - offset) {
|
299
299
|
decompression_ptr = handle.Ptr() + offset;
|
300
300
|
} else {
|
301
|
-
decompression_buffer =
|
301
|
+
decompression_buffer = make_unsafe_array<data_t>(compressed_size);
|
302
302
|
auto target_ptr = decompression_buffer.get();
|
303
303
|
|
304
304
|
// now append the string to the single buffer
|
@@ -7,7 +7,7 @@
|
|
7
7
|
namespace duckdb {
|
8
8
|
|
9
9
|
void ListStats::Construct(BaseStatistics &stats) {
|
10
|
-
stats.child_stats =
|
10
|
+
stats.child_stats = unsafe_array_ptr<BaseStatistics>(new BaseStatistics[1]);
|
11
11
|
BaseStatistics::Construct(stats.child_stats[0], ListType::GetChildType(stats.GetType()));
|
12
12
|
}
|
13
13
|
|