duckdb 0.7.2-dev3441.0 → 0.7.2-dev3515.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/package.json +2 -2
  2. package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
  3. package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
  4. package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -4
  5. package/src/duckdb/src/common/compressed_file_system.cpp +2 -2
  6. package/src/duckdb/src/common/file_system.cpp +2 -2
  7. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  8. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  9. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +1 -1
  10. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  11. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +4 -3
  12. package/src/duckdb/src/common/serializer.cpp +1 -1
  13. package/src/duckdb/src/common/sort/radix_sort.cpp +5 -5
  14. package/src/duckdb/src/common/string_util.cpp +2 -2
  15. package/src/duckdb/src/common/types/bit.cpp +2 -2
  16. package/src/duckdb/src/common/types/blob.cpp +2 -2
  17. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  18. package/src/duckdb/src/common/types/date.cpp +1 -1
  19. package/src/duckdb/src/common/types/decimal.cpp +2 -2
  20. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +14 -2
  21. package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
  22. package/src/duckdb/src/common/types/time.cpp +1 -1
  23. package/src/duckdb/src/common/types/vector.cpp +7 -7
  24. package/src/duckdb/src/common/virtual_file_system.cpp +4 -0
  25. package/src/duckdb/src/common/windows_util.cpp +2 -2
  26. package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +6 -3
  27. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -5
  28. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +1 -1
  29. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  30. package/src/duckdb/src/execution/join_hashtable.cpp +3 -3
  31. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
  32. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  33. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  34. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  35. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -7
  36. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +4 -41
  37. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
  38. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +1 -1
  39. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +2 -2
  40. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +3 -4
  41. package/src/duckdb/src/execution/window_segment_tree.cpp +1 -1
  42. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
  43. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -1
  44. package/src/duckdb/src/function/scalar/string/concat.cpp +1 -1
  45. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  46. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  47. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  48. package/src/duckdb/src/function/table/read_csv.cpp +43 -35
  49. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  50. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
  51. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -3
  52. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -5
  53. package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -9
  54. package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +15 -0
  55. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +1 -0
  56. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  58. package/src/duckdb/src/include/duckdb/common/serializer/buffered_serializer.hpp +3 -2
  59. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -3
  60. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +11 -6
  61. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -1
  62. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -3
  64. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +4 -4
  65. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +53 -22
  66. package/src/duckdb/src/include/duckdb/common/vector.hpp +5 -2
  67. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +4 -4
  69. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +1 -1
  70. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +0 -2
  73. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -2
  74. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +27 -127
  75. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +2 -2
  76. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +2 -2
  77. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  78. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +2 -4
  79. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +3 -3
  80. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +5 -0
  81. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -2
  82. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +9 -2
  83. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
  85. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +1 -1
  86. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
  87. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +5 -5
  88. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +14 -6
  89. package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -1
  90. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +10 -0
  91. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +32 -7
  92. package/src/duckdb/src/storage/arena_allocator.cpp +1 -1
  93. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -11
  94. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
  95. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -2
  96. package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
  97. package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
  98. package/src/duckdb/src/storage/table/row_group.cpp +2 -2
  99. package/src/duckdb/src/storage/table/update_segment.cpp +7 -6
  100. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -2
  101. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +9 -0
  102. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +13 -12
  103. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  104. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12537 -12415
  105. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  106. package/src/statement.cpp +15 -13
@@ -11,152 +11,52 @@
11
11
  #include "duckdb/common/file_system.hpp"
12
12
  #include "duckdb/common/mutex.hpp"
13
13
  #include "duckdb/common/helper.hpp"
14
+ #include "duckdb/common/allocator.hpp"
14
15
 
15
16
  namespace duckdb {
17
+ class Allocator;
18
+ class FileSystem;
16
19
 
17
20
  struct CSVFileHandle {
18
21
  public:
19
- explicit CSVFileHandle(unique_ptr<FileHandle> file_handle_p, bool enable_reset = true)
20
- : file_handle(std::move(file_handle_p)), reset_enabled(enable_reset) {
21
- can_seek = file_handle->CanSeek();
22
- plain_file_source = file_handle->OnDiskFile() && can_seek;
23
- file_size = file_handle->GetFileSize();
24
- }
22
+ CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<FileHandle> file_handle_p, const string &path_p,
23
+ FileCompressionType compression, bool enable_reset = true);
25
24
 
26
- bool CanSeek() {
27
- return can_seek;
28
- }
29
- void Seek(idx_t position) {
30
- if (!can_seek) {
31
- throw InternalException("Cannot seek in this file");
32
- }
33
- file_handle->Seek(position);
34
- }
35
- idx_t SeekPosition() {
36
- if (!can_seek) {
37
- throw InternalException("Cannot seek in this file");
38
- }
39
- return file_handle->SeekPosition();
40
- }
41
- void Reset() {
42
- if (plain_file_source) {
43
- file_handle->Reset();
44
- } else {
45
- if (!reset_enabled) {
46
- throw InternalException("Reset called but reset is not enabled for this CSV Handle");
47
- }
48
- read_position = 0;
49
- }
50
- }
51
- bool PlainFileSource() {
52
- return plain_file_source;
53
- }
54
-
55
- bool OnDiskFile() {
56
- return file_handle->OnDiskFile();
57
- }
58
-
59
- idx_t FileSize() {
60
- return file_size;
61
- }
25
+ mutex main_mutex;
62
26
 
63
- bool FinishedReading() {
64
- return requested_bytes >= file_size;
65
- }
27
+ public:
28
+ bool CanSeek();
29
+ void Seek(idx_t position);
30
+ idx_t SeekPosition();
31
+ void Reset();
32
+ bool OnDiskFile();
66
33
 
67
- idx_t Read(void *buffer, idx_t nr_bytes) {
68
- requested_bytes += nr_bytes;
69
- if (!plain_file_source) {
70
- // not a plain file source: we need to do some bookkeeping around the reset functionality
71
- idx_t result_offset = 0;
72
- if (read_position < buffer_size) {
73
- // we need to read from our cached buffer
74
- auto buffer_read_count = MinValue<idx_t>(nr_bytes, buffer_size - read_position);
75
- memcpy(buffer, cached_buffer.get() + read_position, buffer_read_count);
76
- result_offset += buffer_read_count;
77
- read_position += buffer_read_count;
78
- if (result_offset == nr_bytes) {
79
- return nr_bytes;
80
- }
81
- } else if (!reset_enabled && cached_buffer) {
82
- // reset is disabled, but we still have cached data
83
- // we can remove any cached data
84
- cached_buffer.reset();
85
- buffer_size = 0;
86
- buffer_capacity = 0;
87
- read_position = 0;
88
- }
89
- // we have data left to read from the file
90
- // read directly into the buffer
91
- auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
92
- file_size = file_handle->GetFileSize();
93
- read_position += bytes_read;
94
- if (reset_enabled) {
95
- // if reset caching is enabled, we need to cache the bytes that we have read
96
- if (buffer_size + bytes_read >= buffer_capacity) {
97
- // no space; first enlarge the buffer
98
- buffer_capacity = MaxValue<idx_t>(NextPowerOfTwo(buffer_size + bytes_read), buffer_capacity * 2);
34
+ idx_t FileSize();
99
35
 
100
- auto new_buffer = unique_ptr<data_t[]>(new data_t[buffer_capacity]);
101
- if (buffer_size > 0) {
102
- memcpy(new_buffer.get(), cached_buffer.get(), buffer_size);
103
- }
104
- cached_buffer = std::move(new_buffer);
105
- }
106
- memcpy(cached_buffer.get() + buffer_size, (char *)buffer + result_offset, bytes_read);
107
- buffer_size += bytes_read;
108
- }
36
+ bool FinishedReading();
109
37
 
110
- return result_offset + bytes_read;
111
- } else {
112
- return file_handle->Read(buffer, nr_bytes);
113
- }
114
- }
38
+ idx_t Read(void *buffer, idx_t nr_bytes);
115
39
 
116
- string ReadLine() {
117
- bool carriage_return = false;
118
- string result;
119
- char buffer[1];
120
- while (true) {
121
- idx_t bytes_read = Read(buffer, 1);
122
- if (bytes_read == 0) {
123
- return result;
124
- }
125
- if (carriage_return) {
126
- if (buffer[0] != '\n') {
127
- if (!file_handle->CanSeek()) {
128
- throw BinderException(
129
- "Carriage return newlines not supported when reading CSV files in which we cannot seek");
130
- }
131
- file_handle->Seek(file_handle->SeekPosition() - 1);
132
- return result;
133
- }
134
- }
135
- if (buffer[0] == '\n') {
136
- return result;
137
- }
138
- if (buffer[0] != '\r') {
139
- result += buffer[0];
140
- } else {
141
- carriage_return = true;
142
- }
143
- }
144
- }
40
+ string ReadLine();
41
+ void DisableReset();
145
42
 
146
- void DisableReset() {
147
- this->reset_enabled = false;
148
- }
149
- mutex main_mutex;
150
- idx_t count = 0;
43
+ static unique_ptr<FileHandle> OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
44
+ FileCompressionType compression);
45
+ static unique_ptr<CSVFileHandle> OpenFile(FileSystem &fs, Allocator &allocator, const string &path,
46
+ FileCompressionType compression, bool enable_reset);
151
47
 
152
48
  private:
49
+ FileSystem &fs;
50
+ Allocator &allocator;
153
51
  unique_ptr<FileHandle> file_handle;
52
+ string path;
53
+ FileCompressionType compression;
154
54
  bool reset_enabled = true;
155
55
  bool can_seek = false;
156
- bool plain_file_source = false;
56
+ bool on_disk_file = false;
157
57
  idx_t file_size = 0;
158
58
  // reset support
159
- unique_ptr<data_t[]> cached_buffer;
59
+ AllocatedData cached_buffer;
160
60
  idx_t read_position = 0;
161
61
  idx_t buffer_size = 0;
162
62
  idx_t buffer_capacity = 0;
@@ -67,7 +67,7 @@ struct CSVBufferRead {
67
67
  } else {
68
68
  // 3) It starts in the current buffer and ends in the next buffer
69
69
  D_ASSERT(next_buffer);
70
- auto intersection = unique_ptr<char[]>(new char[length]);
70
+ auto intersection = make_unsafe_array<char>(length);
71
71
  idx_t cur_pos = 0;
72
72
  auto buffer_ptr = buffer->Ptr();
73
73
  for (idx_t i = start_buffer; i < buffer->GetBufferSize(); i++) {
@@ -85,7 +85,7 @@ struct CSVBufferRead {
85
85
 
86
86
  shared_ptr<CSVBuffer> buffer;
87
87
  shared_ptr<CSVBuffer> next_buffer;
88
- vector<unique_ptr<char[]>> intersections;
88
+ vector<unsafe_array_ptr<char>> intersections;
89
89
  optional_ptr<LineInfo> line_info;
90
90
 
91
91
  idx_t buffer_start;
@@ -46,9 +46,9 @@ protected:
46
46
  // The actual pointer to the data
47
47
  data_ptr_t data;
48
48
  //! The owned data of the HT
49
- unique_ptr<data_t[]> owned_data;
49
+ unsafe_array_ptr<data_t> owned_data;
50
50
  //! Information on whether or not a specific group has any entries
51
- unique_ptr<bool[]> group_is_set;
51
+ unsafe_array_ptr<bool> group_is_set;
52
52
 
53
53
  //! The minimum values for each of the group columns
54
54
  vector<Value> group_minima;
@@ -113,7 +113,7 @@ private:
113
113
  Vector statev;
114
114
 
115
115
  //! The actual window segment tree: an array of aggregate states that represent all the intermediate nodes
116
- unique_ptr<data_t[]> levels_flat_native;
116
+ unsafe_array_ptr<data_t> levels_flat_native;
117
117
  //! For each level, the starting location in the levels_flat_native array
118
118
  vector<idx_t> levels_flat_start;
119
119
 
@@ -55,7 +55,7 @@ struct WriteCSVData : public BaseCSVData {
55
55
  //! The size of the CSV file (in bytes) that we buffer before we flush it to disk
56
56
  idx_t flush_size = 4096 * 8;
57
57
  //! For each byte whether or not the CSV file requires quotes when containing the byte
58
- unique_ptr<bool[]> requires_quotes;
58
+ unsafe_array_ptr<bool> requires_quotes;
59
59
  };
60
60
 
61
61
  struct ColumnInfo {
@@ -65,7 +65,7 @@ struct ColumnInfo {
65
65
  names = std::move(names_p);
66
66
  types = std::move(types_p);
67
67
  }
68
- void Serialize(FieldWriter &writer) {
68
+ void Serialize(FieldWriter &writer) const {
69
69
  writer.WriteList<string>(names);
70
70
  writer.WriteRegularSerializableList<LogicalType>(types);
71
71
  }
@@ -99,8 +99,6 @@ struct ReadCSVData : public BaseCSVData {
99
99
  bool single_threaded = false;
100
100
  //! Reader bind data
101
101
  MultiFileReaderBindData reader_bind;
102
- //! If any file is a pipe
103
- bool is_pipe = false;
104
102
  vector<ColumnInfo> column_info;
105
103
 
106
104
  void Initialize(unique_ptr<BufferedCSVReader> &reader) {
@@ -27,12 +27,12 @@ struct SingleJoinRelation {
27
27
 
28
28
  //! Set of relations, used in the join graph.
29
29
  struct JoinRelationSet {
30
- JoinRelationSet(unique_ptr<idx_t[]> relations, idx_t count) : relations(std::move(relations)), count(count) {
30
+ JoinRelationSet(unsafe_array_ptr<idx_t> relations, idx_t count) : relations(std::move(relations)), count(count) {
31
31
  }
32
32
 
33
33
  string ToString() const;
34
34
 
35
- unique_ptr<idx_t[]> relations;
35
+ unsafe_array_ptr<idx_t> relations;
36
36
  idx_t count;
37
37
 
38
38
  static bool IsSubset(JoinRelationSet &super, JoinRelationSet &sub);
@@ -55,7 +55,7 @@ public:
55
55
  //! Create or get a JoinRelationSet from a set of relation bindings
56
56
  JoinRelationSet &GetJoinRelation(unordered_set<idx_t> &bindings);
57
57
  //! Create or get a JoinRelationSet from a (sorted, duplicate-free!) list of relations
58
- JoinRelationSet &GetJoinRelation(unique_ptr<idx_t[]> relations, idx_t count);
58
+ JoinRelationSet &GetJoinRelation(unsafe_array_ptr<idx_t> relations, idx_t count);
59
59
  //! Union two sets of relations together and create a new relation set
60
60
  JoinRelationSet &Union(JoinRelationSet &left, JoinRelationSet &right);
61
61
  // //! Create the set difference of left \ right (i.e. all elements in left that are not in right)
@@ -24,6 +24,8 @@ enum class OnConflictAction : uint8_t {
24
24
  REPLACE // Only used in transform/bind step, changed to UPDATE later
25
25
  };
26
26
 
27
+ enum class InsertColumnOrder : uint8_t { INSERT_BY_POSITION = 0, INSERT_BY_NAME = 1 };
28
+
27
29
  class OnConflictInfo {
28
30
  public:
29
31
  OnConflictInfo();
@@ -75,6 +77,9 @@ public:
75
77
  //! Whether or not this a DEFAULT VALUES
76
78
  bool default_values = false;
77
79
 
80
+ //! INSERT BY POSITION or INSERT BY NAME
81
+ InsertColumnOrder column_order = InsertColumnOrder::INSERT_BY_POSITION;
82
+
78
83
  protected:
79
84
  InsertStatement(const InsertStatement &other);
80
85
 
@@ -20,7 +20,7 @@ struct ArenaChunk {
20
20
  AllocatedData data;
21
21
  idx_t current_position;
22
22
  idx_t maximum_size;
23
- unique_ptr<ArenaChunk> next;
23
+ unsafe_unique_ptr<ArenaChunk> next;
24
24
  ArenaChunk *prev;
25
25
  };
26
26
 
@@ -56,7 +56,7 @@ private:
56
56
  //! Internal allocator that is used by the arena allocator
57
57
  Allocator &allocator;
58
58
  idx_t current_capacity;
59
- unique_ptr<ArenaChunk> head;
59
+ unsafe_unique_ptr<ArenaChunk> head;
60
60
  ArenaChunk *tail;
61
61
  //! An allocator wrapper using this arena allocator
62
62
  Allocator arena_allocator;
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/storage/storage_info.hpp"
12
+ #include "duckdb/common/file_buffer.hpp"
12
13
 
13
14
  namespace duckdb {
14
15
  class BlockHandle;
@@ -30,9 +31,15 @@ public:
30
31
  //! Returns whether or not the BufferHandle is valid.
31
32
  DUCKDB_API bool IsValid() const;
32
33
  //! Returns a pointer to the buffer data. Handle must be valid.
33
- DUCKDB_API data_ptr_t Ptr() const;
34
+ inline data_ptr_t Ptr() const {
35
+ D_ASSERT(IsValid());
36
+ return node->buffer;
37
+ }
34
38
  //! Returns a pointer to the buffer data. Handle must be valid.
35
- DUCKDB_API data_ptr_t Ptr();
39
+ inline data_ptr_t Ptr() {
40
+ D_ASSERT(IsValid());
41
+ return node->buffer;
42
+ }
36
43
  //! Gets the underlying file buffer. Handle must be valid.
37
44
  DUCKDB_API FileBuffer &GetFileBuffer();
38
45
  //! Destroys the buffer handle
@@ -138,7 +138,7 @@ private:
138
138
  StringStatsData string_data;
139
139
  } stats_union;
140
140
  //! Child stats (for LIST and STRUCT)
141
- unique_ptr<BaseStatistics[]> child_stats;
141
+ unsafe_array_ptr<BaseStatistics> child_stats;
142
142
  };
143
143
 
144
144
  } // namespace duckdb
@@ -44,7 +44,7 @@ struct RowGroupAppendState {
44
44
  //! The current row_group we are appending to
45
45
  RowGroup *row_group;
46
46
  //! The column append states
47
- unique_ptr<ColumnAppendState[]> states;
47
+ unsafe_array_ptr<ColumnAppendState> states;
48
48
  //! Offset within the row_group
49
49
  idx_t offset_in_row_group;
50
50
  };
@@ -99,7 +99,7 @@ public:
99
99
  //! The maximum row within the row group
100
100
  idx_t max_row_group_row;
101
101
  //! Child column scans
102
- unique_ptr<ColumnScanState[]> column_scans;
102
+ unsafe_array_ptr<ColumnScanState> column_scans;
103
103
  //! Row group segment tree
104
104
  RowGroupSegmentTree *row_groups;
105
105
  //! The total maximum row index
@@ -96,8 +96,8 @@ private:
96
96
 
97
97
  struct UpdateNodeData {
98
98
  unique_ptr<UpdateInfo> info;
99
- unique_ptr<sel_t[]> tuples;
100
- unique_ptr<data_t[]> tuple_data;
99
+ unsafe_array_ptr<sel_t> tuples;
100
+ unsafe_array_ptr<data_t> tuple_data;
101
101
  };
102
102
 
103
103
  struct UpdateNode {
@@ -35,7 +35,7 @@ bool JoinRelationSet::IsSubset(JoinRelationSet &super, JoinRelationSet &sub) {
35
35
  return false;
36
36
  }
37
37
 
38
- JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unique_ptr<idx_t[]> relations, idx_t count) {
38
+ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unsafe_array_ptr<idx_t> relations, idx_t count) {
39
39
  // now look it up in the tree
40
40
  reference<JoinRelationTreeNode> info(root);
41
41
  for (idx_t i = 0; i < count; i++) {
@@ -59,7 +59,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unique_ptr<idx_t[]> rel
59
59
  //! Create or get a JoinRelationSet from a single node with the given index
60
60
  JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
61
61
  // create a sorted vector of the relations
62
- auto relations = unique_ptr<idx_t[]>(new idx_t[1]);
62
+ auto relations = make_unsafe_array<idx_t>(1);
63
63
  relations[0] = index;
64
64
  idx_t count = 1;
65
65
  return GetJoinRelation(std::move(relations), count);
@@ -67,7 +67,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(idx_t index) {
67
67
 
68
68
  JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &bindings) {
69
69
  // create a sorted vector of the relations
70
- unique_ptr<idx_t[]> relations = bindings.empty() ? nullptr : unique_ptr<idx_t[]>(new idx_t[bindings.size()]);
70
+ unsafe_array_ptr<idx_t> relations = bindings.empty() ? nullptr : make_unsafe_array<idx_t>(bindings.size());
71
71
  idx_t count = 0;
72
72
  for (auto &entry : bindings) {
73
73
  relations[count++] = entry;
@@ -77,7 +77,7 @@ JoinRelationSet &JoinRelationSetManager::GetJoinRelation(unordered_set<idx_t> &b
77
77
  }
78
78
 
79
79
  JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelationSet &right) {
80
- auto relations = unique_ptr<idx_t[]>(new idx_t[left.count + right.count]);
80
+ auto relations = make_unsafe_array<idx_t>(left.count + right.count);
81
81
  idx_t count = 0;
82
82
  // move through the left and right relations, eliminating duplicates
83
83
  idx_t i = 0, j = 0;
@@ -113,7 +113,7 @@ JoinRelationSet &JoinRelationSetManager::Union(JoinRelationSet &left, JoinRelati
113
113
  }
114
114
 
115
115
  // JoinRelationSet *JoinRelationSetManager::Difference(JoinRelationSet *left, JoinRelationSet *right) {
116
- // auto relations = unique_ptr<idx_t[]>(new idx_t[left->count]);
116
+ // auto relations = unsafe_array_ptr<idx_t>(new idx_t[left->count]);
117
117
  // idx_t count = 0;
118
118
  // // move through the left and right relations
119
119
  // idx_t i = 0, j = 0;
@@ -195,18 +195,24 @@ void UnnestRewriter::UpdateRHSBindings(unique_ptr<LogicalOperator> *plan_ptr, un
195
195
  updater.replace_bindings.push_back(replace_binding);
196
196
  }
197
197
 
198
- // temporarily remove the BOUND_UNNEST and the child of the LOGICAL_UNNEST from the plan
198
+ // temporarily remove the BOUND_UNNESTs and the child of the LOGICAL_UNNEST from the plan
199
199
  D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
200
200
  auto &unnest = curr_op->get()->Cast<LogicalUnnest>();
201
- auto temp_bound_unnest = std::move(unnest.expressions[0]);
201
+ vector<unique_ptr<Expression>> temp_bound_unnests;
202
+ for (auto &temp_bound_unnest : unnest.expressions) {
203
+ temp_bound_unnests.push_back(std::move(temp_bound_unnest));
204
+ }
205
+ D_ASSERT(unnest.children.size() == 1);
202
206
  auto temp_unnest_child = std::move(unnest.children[0]);
203
207
  unnest.expressions.clear();
204
208
  unnest.children.clear();
205
209
  // update the bindings of the plan
206
210
  updater.VisitOperator(*plan_ptr->get());
207
211
  updater.replace_bindings.clear();
208
- // add the child again
209
- unnest.expressions.push_back(std::move(temp_bound_unnest));
212
+ // add the children again
213
+ for (auto &temp_bound_unnest : temp_bound_unnests) {
214
+ unnest.expressions.push_back(std::move(temp_bound_unnest));
215
+ }
210
216
  unnest.children.push_back(std::move(temp_unnest_child));
211
217
 
212
218
  // add the LHS expressions to each LOGICAL_PROJECTION
@@ -256,6 +262,7 @@ void UnnestRewriter::UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &update
256
262
  D_ASSERT(curr_op->get()->type == LogicalOperatorType::LOGICAL_UNNEST);
257
263
  auto &unnest = curr_op->get()->Cast<LogicalUnnest>();
258
264
 
265
+ D_ASSERT(unnest.children.size() == 1);
259
266
  auto unnest_child_cols = unnest.children[0]->GetColumnBindings();
260
267
  for (idx_t delim_col_idx = 0; delim_col_idx < delim_columns.size(); delim_col_idx++) {
261
268
  for (idx_t child_col_idx = 0; child_col_idx < unnest_child_cols.size(); child_col_idx++) {
@@ -268,8 +275,9 @@ void UnnestRewriter::UpdateBoundUnnestBindings(UnnestRewriterPlanUpdater &update
268
275
  }
269
276
 
270
277
  // update bindings
271
- D_ASSERT(unnest.expressions.size() == 1);
272
- updater.VisitExpression(&unnest.expressions[0]);
278
+ for (auto &unnest_expr : unnest.expressions) {
279
+ updater.VisitExpression(&unnest_expr);
280
+ }
273
281
  updater.replace_bindings.clear();
274
282
  }
275
283
 
@@ -30,7 +30,7 @@ InsertStatement::InsertStatement(const InsertStatement &other)
30
30
  : SQLStatement(other), select_statement(unique_ptr_cast<SQLStatement, SelectStatement>(
31
31
  other.select_statement ? other.select_statement->Copy() : nullptr)),
32
32
  columns(other.columns), table(other.table), schema(other.schema), catalog(other.catalog),
33
- default_values(other.default_values) {
33
+ default_values(other.default_values), column_order(other.column_order) {
34
34
  cte_map = other.cte_map.Copy();
35
35
  for (auto &expr : other.returning_list) {
36
36
  returning_list.emplace_back(expr->Copy());
@@ -81,6 +81,9 @@ string InsertStatement::ToString() const {
81
81
  if (table_ref && !table_ref->alias.empty()) {
82
82
  result += StringUtil::Format(" AS %s", KeywordHelper::WriteOptionallyQuoted(table_ref->alias));
83
83
  }
84
+ if (column_order == InsertColumnOrder::INSERT_BY_NAME) {
85
+ result += " BY NAME";
86
+ }
84
87
  if (!columns.empty()) {
85
88
  result += " (";
86
89
  for (idx_t i = 0; i < columns.size(); i++) {
@@ -67,6 +67,16 @@ unique_ptr<InsertStatement> Transformer::TransformInsert(duckdb_libpgquery::PGNo
67
67
  result->on_conflict_info = DummyOnConflictClause(stmt->onConflictAlias, result->schema);
68
68
  result->table_ref = TransformRangeVar(stmt->relation);
69
69
  }
70
+ switch (stmt->insert_column_order) {
71
+ case duckdb_libpgquery::PG_INSERT_BY_POSITION:
72
+ result->column_order = InsertColumnOrder::INSERT_BY_POSITION;
73
+ break;
74
+ case duckdb_libpgquery::PG_INSERT_BY_NAME:
75
+ result->column_order = InsertColumnOrder::INSERT_BY_NAME;
76
+ break;
77
+ default:
78
+ throw InternalException("Unrecognized insert column order in TransformInsert");
79
+ }
70
80
  result->catalog = qname.catalog;
71
81
  return result;
72
82
  }
@@ -406,6 +406,26 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
406
406
  // Add CTEs as bindable
407
407
  AddCTEMap(stmt.cte_map);
408
408
 
409
+ auto values_list = stmt.GetValuesList();
410
+
411
+ // bind the root select node (if any)
412
+ BoundStatement root_select;
413
+ if (stmt.column_order == InsertColumnOrder::INSERT_BY_NAME) {
414
+ if (values_list) {
415
+ throw BinderException("INSERT BY NAME can only be used when inserting from a SELECT statement");
416
+ }
417
+ if (!stmt.columns.empty()) {
418
+ throw BinderException("INSERT BY NAME cannot be combined with an explicit column list");
419
+ }
420
+ D_ASSERT(stmt.select_statement);
421
+ // INSERT BY NAME - generate the columns from the names of the SELECT statement
422
+ auto select_binder = Binder::CreateBinder(context, this);
423
+ root_select = select_binder->Bind(*stmt.select_statement);
424
+ MoveCorrelatedExpressions(*select_binder);
425
+
426
+ stmt.columns = root_select.names;
427
+ }
428
+
409
429
  vector<LogicalIndex> named_column_map;
410
430
  if (!stmt.columns.empty() || stmt.default_values) {
411
431
  // insertion statement specifies column list
@@ -413,6 +433,10 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
413
433
  // create a mapping of (list index) -> (column index)
414
434
  case_insensitive_map_t<idx_t> column_name_map;
415
435
  for (idx_t i = 0; i < stmt.columns.size(); i++) {
436
+ auto entry = column_name_map.insert(make_pair(stmt.columns[i], i));
437
+ if (!entry.second) {
438
+ throw BinderException("Duplicate column name \"%s\" in INSERT", stmt.columns[i]);
439
+ }
416
440
  column_name_map[stmt.columns[i]] = i;
417
441
  auto column_index = table.GetColumnIndex(stmt.columns[i]);
418
442
  if (column_index.index == COLUMN_IDENTIFIER_ROW_ID) {
@@ -436,8 +460,8 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
436
460
  }
437
461
  }
438
462
  } else {
439
- // No columns specified, assume insertion into all columns
440
- // Intentionally don't populate 'column_index_map' as an indication of this
463
+ // insert by position and no columns specified - insertion into all columns of the table
464
+ // intentionally don't populate 'column_index_map' as an indication of this
441
465
  for (auto &col : table.GetColumns().Physical()) {
442
466
  named_column_map.push_back(col.Logical());
443
467
  insert->expected_types.push_back(col.Type());
@@ -454,7 +478,6 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
454
478
  idx_t expected_columns = stmt.columns.empty() ? table.GetColumns().PhysicalColumnCount() : stmt.columns.size();
455
479
 
456
480
  // special case: check if we are inserting from a VALUES statement
457
- auto values_list = stmt.GetValuesList();
458
481
  if (values_list) {
459
482
  auto &expr_list = values_list->Cast<ExpressionListRef>();
460
483
  expr_list.expected_types.resize(expected_columns);
@@ -487,10 +510,12 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
487
510
  // parse select statement and add to logical plan
488
511
  unique_ptr<LogicalOperator> root;
489
512
  if (stmt.select_statement) {
490
- auto select_binder = Binder::CreateBinder(context, this);
491
- auto root_select = select_binder->Bind(*stmt.select_statement);
492
- MoveCorrelatedExpressions(*select_binder);
493
-
513
+ if (stmt.column_order == InsertColumnOrder::INSERT_BY_POSITION) {
514
+ auto select_binder = Binder::CreateBinder(context, this);
515
+ root_select = select_binder->Bind(*stmt.select_statement);
516
+ MoveCorrelatedExpressions(*select_binder);
517
+ }
518
+ // inserting from a select - check if the column count matches
494
519
  CheckInsertColumnCountMismatch(expected_columns, root_select.types.size(), !stmt.columns.empty(),
495
520
  table.name.c_str());
496
521
 
@@ -64,7 +64,7 @@ data_ptr_t ArenaAllocator::Allocate(idx_t len) {
64
64
  do {
65
65
  current_capacity *= 2;
66
66
  } while (current_capacity < len);
67
- auto new_chunk = make_uniq<ArenaChunk>(allocator, current_capacity);
67
+ auto new_chunk = make_unsafe_uniq<ArenaChunk>(allocator, current_capacity);
68
68
  if (head) {
69
69
  head->prev = new_chunk.get();
70
70
  new_chunk->next = std::move(head);
@@ -7,7 +7,8 @@ namespace duckdb {
7
7
  BufferHandle::BufferHandle() : handle(nullptr), node(nullptr) {
8
8
  }
9
9
 
10
- BufferHandle::BufferHandle(shared_ptr<BlockHandle> handle, FileBuffer *node) : handle(std::move(handle)), node(node) {
10
+ BufferHandle::BufferHandle(shared_ptr<BlockHandle> handle_p, FileBuffer *node_p)
11
+ : handle(std::move(handle_p)), node(node_p) {
11
12
  }
12
13
 
13
14
  BufferHandle::BufferHandle(BufferHandle &&other) noexcept {
@@ -29,16 +30,6 @@ bool BufferHandle::IsValid() const {
29
30
  return node != nullptr;
30
31
  }
31
32
 
32
- data_ptr_t BufferHandle::Ptr() const {
33
- D_ASSERT(IsValid());
34
- return node->buffer;
35
- }
36
-
37
- data_ptr_t BufferHandle::Ptr() {
38
- D_ASSERT(IsValid());
39
- return node->buffer;
40
- }
41
-
42
33
  void BufferHandle::Destroy() {
43
34
  if (!handle || !IsValid()) {
44
35
  return;
@@ -32,7 +32,7 @@ void WriteOverflowStringsToDisk::WriteString(string_t string, block_id_t &result
32
32
  MiniZStream s;
33
33
  size_t compressed_size = 0;
34
34
  compressed_size = s.MaxCompressedLength(uncompressed_size);
35
- auto compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
35
+ auto compressed_buf = make_unsafe_array<data_t>(compressed_size);
36
36
  s.Compress((const char *)string.GetData(), uncompressed_size, (char *)compressed_buf.get(), &compressed_size);
37
37
  string_t compressed_string((const char *)compressed_buf.get(), compressed_size);
38
38
 
@@ -292,13 +292,13 @@ string_t UncompressedStringStorage::ReadOverflowString(ColumnSegment &segment, V
292
292
  offset += 2 * sizeof(uint32_t);
293
293
 
294
294
  data_ptr_t decompression_ptr;
295
- unique_ptr<data_t[]> decompression_buffer;
295
+ unsafe_array_ptr<data_t> decompression_buffer;
296
296
 
297
297
  // If string is in single block we decompress straight from it, else we copy first
298
298
  if (remaining <= Storage::BLOCK_SIZE - sizeof(block_id_t) - offset) {
299
299
  decompression_ptr = handle.Ptr() + offset;
300
300
  } else {
301
- decompression_buffer = unique_ptr<data_t[]>(new data_t[compressed_size]);
301
+ decompression_buffer = make_unsafe_array<data_t>(compressed_size);
302
302
  auto target_ptr = decompression_buffer.get();
303
303
 
304
304
  // now append the string to the single buffer
@@ -7,7 +7,7 @@
7
7
  namespace duckdb {
8
8
 
9
9
  void ListStats::Construct(BaseStatistics &stats) {
10
- stats.child_stats = unique_ptr<BaseStatistics[]>(new BaseStatistics[1]);
10
+ stats.child_stats = unsafe_array_ptr<BaseStatistics>(new BaseStatistics[1]);
11
11
  BaseStatistics::Construct(stats.child_stats[0], ListType::GetChildType(stats.GetType()));
12
12
  }
13
13