duckdb 0.7.2-dev3441.0 → 0.7.2-dev3515.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/package.json +2 -2
  2. package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
  3. package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
  4. package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -4
  5. package/src/duckdb/src/common/compressed_file_system.cpp +2 -2
  6. package/src/duckdb/src/common/file_system.cpp +2 -2
  7. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  8. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  9. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +1 -1
  10. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  11. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +4 -3
  12. package/src/duckdb/src/common/serializer.cpp +1 -1
  13. package/src/duckdb/src/common/sort/radix_sort.cpp +5 -5
  14. package/src/duckdb/src/common/string_util.cpp +2 -2
  15. package/src/duckdb/src/common/types/bit.cpp +2 -2
  16. package/src/duckdb/src/common/types/blob.cpp +2 -2
  17. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  18. package/src/duckdb/src/common/types/date.cpp +1 -1
  19. package/src/duckdb/src/common/types/decimal.cpp +2 -2
  20. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +14 -2
  21. package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
  22. package/src/duckdb/src/common/types/time.cpp +1 -1
  23. package/src/duckdb/src/common/types/vector.cpp +7 -7
  24. package/src/duckdb/src/common/virtual_file_system.cpp +4 -0
  25. package/src/duckdb/src/common/windows_util.cpp +2 -2
  26. package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +6 -3
  27. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -5
  28. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +1 -1
  29. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  30. package/src/duckdb/src/execution/join_hashtable.cpp +3 -3
  31. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
  32. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  33. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  34. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  35. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -7
  36. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +4 -41
  37. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
  38. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +1 -1
  39. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +2 -2
  40. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +3 -4
  41. package/src/duckdb/src/execution/window_segment_tree.cpp +1 -1
  42. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
  43. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -1
  44. package/src/duckdb/src/function/scalar/string/concat.cpp +1 -1
  45. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  46. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  47. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  48. package/src/duckdb/src/function/table/read_csv.cpp +43 -35
  49. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  50. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
  51. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -3
  52. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -5
  53. package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -9
  54. package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +15 -0
  55. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +1 -0
  56. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  58. package/src/duckdb/src/include/duckdb/common/serializer/buffered_serializer.hpp +3 -2
  59. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -3
  60. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +11 -6
  61. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -1
  62. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -3
  64. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +4 -4
  65. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +53 -22
  66. package/src/duckdb/src/include/duckdb/common/vector.hpp +5 -2
  67. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +4 -4
  69. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +1 -1
  70. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
  72. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +0 -2
  73. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -2
  74. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +27 -127
  75. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +2 -2
  76. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +2 -2
  77. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  78. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +2 -4
  79. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +3 -3
  80. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +5 -0
  81. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -2
  82. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +9 -2
  83. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
  85. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +1 -1
  86. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
  87. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +5 -5
  88. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +14 -6
  89. package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -1
  90. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +10 -0
  91. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +32 -7
  92. package/src/duckdb/src/storage/arena_allocator.cpp +1 -1
  93. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -11
  94. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
  95. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -2
  96. package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
  97. package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
  98. package/src/duckdb/src/storage/table/row_group.cpp +2 -2
  99. package/src/duckdb/src/storage/table/update_segment.cpp +7 -6
  100. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -2
  101. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +9 -0
  102. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +13 -12
  103. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  104. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12537 -12415
  105. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  106. package/src/statement.cpp +15 -13
@@ -34,7 +34,7 @@ string BaseCSVReader::GetLineNumberStr(idx_t line_error, bool is_line_estimated,
34
34
 
35
35
  BaseCSVReader::BaseCSVReader(ClientContext &context_p, BufferedCSVReaderOptions options_p,
36
36
  const vector<LogicalType> &requested_types)
37
- : context(context_p), fs(FileSystem::GetFileSystem(context)), allocator(Allocator::Get(context)),
37
+ : context(context_p), fs(FileSystem::GetFileSystem(context)), allocator(BufferAllocator::Get(context)),
38
38
  options(std::move(options_p)) {
39
39
  }
40
40
 
@@ -42,12 +42,7 @@ BaseCSVReader::~BaseCSVReader() {
42
42
  }
43
43
 
44
44
  unique_ptr<CSVFileHandle> BaseCSVReader::OpenCSV(const BufferedCSVReaderOptions &options_p) {
45
- auto file_handle = fs.OpenFile(options_p.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK,
46
- options_p.compression);
47
- if (file_handle->CanSeek()) {
48
- file_handle->Reset();
49
- }
50
- return make_uniq<CSVFileHandle>(std::move(file_handle));
45
+ return CSVFileHandle::OpenFile(fs, allocator, options_p.file_path, options_p.compression, true);
51
46
  }
52
47
 
53
48
  void BaseCSVReader::InitParseChunk(idx_t num_cols) {
@@ -239,18 +239,13 @@ void BufferedCSVReader::Initialize(const vector<LogicalType> &requested_types) {
239
239
  if (return_types.empty()) {
240
240
  throw InvalidInputException("Failed to detect column types from CSV: is the file a valid CSV file?");
241
241
  }
242
- if (cached_chunks.empty()) {
243
- JumpToBeginning(options.skip_rows, options.header);
244
- }
242
+ JumpToBeginning(options.skip_rows, options.header);
245
243
  } else {
246
244
  return_types = requested_types;
247
245
  ResetBuffer();
248
246
  SkipRowsAndReadHeader(options.skip_rows, options.header);
249
247
  }
250
248
  InitParseChunk(return_types.size());
251
- // we only need reset support during the automatic CSV type detection
252
- // since reset support might require caching (in the case of streams), we disable it for the remainder
253
- file_handle->DisableReset();
254
249
  }
255
250
 
256
251
  void BufferedCSVReader::ResetBuffer() {
@@ -262,13 +257,7 @@ void BufferedCSVReader::ResetBuffer() {
262
257
  }
263
258
 
264
259
  void BufferedCSVReader::ResetStream() {
265
- if (!file_handle->CanSeek()) {
266
- // seeking to the beginning appears to not be supported in all compiler/os-scenarios,
267
- // so we have to create a new stream source here for now
268
- file_handle->Reset();
269
- } else {
270
- file_handle->Seek(0);
271
- }
260
+ file_handle->Reset();
272
261
  linenr = 0;
273
262
  linenr_estimated = false;
274
263
  bytes_per_line_avg = 0;
@@ -332,7 +321,7 @@ bool BufferedCSVReader::JumpToNextSample() {
332
321
 
333
322
  // if we deal with any other sources than plaintext files, jumping_samples can be tricky. In that case
334
323
  // we just read x continuous chunks from the stream TODO: make jumps possible for zipfiles.
335
- if (!file_handle->PlainFileSource() || !jumping_samples) {
324
+ if (!file_handle->OnDiskFile() || !jumping_samples) {
336
325
  sample_chunk_idx++;
337
326
  return true;
338
327
  }
@@ -802,21 +791,6 @@ vector<LogicalType> BufferedCSVReader::RefineTypeDetection(const vector<LogicalT
802
791
  }
803
792
  }
804
793
  }
805
-
806
- if (!jumping_samples) {
807
- if ((sample_chunk_idx)*options.sample_chunk_size <= options.buffer_size) {
808
- // cache parse chunk
809
- // create a new chunk and fill it with the remainder
810
- auto chunk = make_uniq<DataChunk>();
811
- auto parse_chunk_types = parse_chunk.GetTypes();
812
- chunk->Move(parse_chunk);
813
- cached_chunks.push(std::move(chunk));
814
- } else {
815
- while (!cached_chunks.empty()) {
816
- cached_chunks.pop();
817
- }
818
- }
819
- }
820
794
  }
821
795
 
822
796
  // set sql types
@@ -1445,7 +1419,7 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start, idx_t &line_start) {
1445
1419
  GetLineNumberStr(linenr, linenr_estimated));
1446
1420
  }
1447
1421
 
1448
- buffer = unique_ptr<char[]>(new char[buffer_read_size + remaining + 1]);
1422
+ buffer = make_unsafe_array<char>(buffer_read_size + remaining + 1);
1449
1423
  buffer_size = remaining + buffer_read_size;
1450
1424
  if (remaining > 0) {
1451
1425
  // remaining from last buffer: copy it here
@@ -1474,17 +1448,6 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start, idx_t &line_start) {
1474
1448
  }
1475
1449
 
1476
1450
  void BufferedCSVReader::ParseCSV(DataChunk &insert_chunk) {
1477
- // if no auto-detect or auto-detect with jumping samples, we have nothing cached and start from the beginning
1478
- if (cached_chunks.empty()) {
1479
- cached_buffers.clear();
1480
- } else {
1481
- auto &chunk = cached_chunks.front();
1482
- parse_chunk.Move(*chunk);
1483
- cached_chunks.pop();
1484
- Flush(insert_chunk);
1485
- return;
1486
- }
1487
-
1488
1451
  string error_message;
1489
1452
  if (!TryParseCSV(ParserMode::PARSING, insert_chunk, error_message)) {
1490
1453
  throw InvalidInputException(error_message);
@@ -0,0 +1,158 @@
1
+ #include "duckdb/execution/operator/persistent/csv_file_handle.hpp"
2
+
3
+ namespace duckdb {
4
+
5
+ CSVFileHandle::CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<FileHandle> file_handle_p,
6
+ const string &path_p, FileCompressionType compression, bool enable_reset)
7
+ : fs(fs), allocator(allocator), file_handle(std::move(file_handle_p)), path(path_p), compression(compression),
8
+ reset_enabled(enable_reset) {
9
+ can_seek = file_handle->CanSeek();
10
+ on_disk_file = file_handle->OnDiskFile();
11
+ file_size = file_handle->GetFileSize();
12
+ }
13
+
14
+ unique_ptr<FileHandle> CSVFileHandle::OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
15
+ FileCompressionType compression) {
16
+ auto file_handle = fs.OpenFile(path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
17
+ if (file_handle->CanSeek()) {
18
+ file_handle->Reset();
19
+ }
20
+ return file_handle;
21
+ }
22
+
23
+ unique_ptr<CSVFileHandle> CSVFileHandle::OpenFile(FileSystem &fs, Allocator &allocator, const string &path,
24
+ FileCompressionType compression, bool enable_reset) {
25
+ auto file_handle = CSVFileHandle::OpenFileHandle(fs, allocator, path, compression);
26
+ return make_uniq<CSVFileHandle>(fs, allocator, std::move(file_handle), path, compression, enable_reset);
27
+ }
28
+
29
+ bool CSVFileHandle::CanSeek() {
30
+ return can_seek;
31
+ }
32
+
33
+ void CSVFileHandle::Seek(idx_t position) {
34
+ if (!can_seek) {
35
+ throw InternalException("Cannot seek in this file");
36
+ }
37
+ file_handle->Seek(position);
38
+ }
39
+
40
+ idx_t CSVFileHandle::SeekPosition() {
41
+ if (!can_seek) {
42
+ throw InternalException("Cannot seek in this file");
43
+ }
44
+ return file_handle->SeekPosition();
45
+ }
46
+
47
+ void CSVFileHandle::Reset() {
48
+ requested_bytes = 0;
49
+ read_position = 0;
50
+ if (can_seek) {
51
+ // we can seek - reset the file handle
52
+ file_handle->Reset();
53
+ } else if (on_disk_file) {
54
+ // we cannot seek but it is an on-disk file - re-open the file
55
+ file_handle = CSVFileHandle::OpenFileHandle(fs, allocator, path, compression);
56
+ } else {
57
+ if (!reset_enabled) {
58
+ throw InternalException("Reset called but reset is not enabled for this CSV Handle");
59
+ }
60
+ read_position = 0;
61
+ }
62
+ }
63
+ bool CSVFileHandle::OnDiskFile() {
64
+ return on_disk_file;
65
+ }
66
+
67
+ idx_t CSVFileHandle::FileSize() {
68
+ return file_size;
69
+ }
70
+
71
+ bool CSVFileHandle::FinishedReading() {
72
+ return requested_bytes >= file_size;
73
+ }
74
+
75
+ idx_t CSVFileHandle::Read(void *buffer, idx_t nr_bytes) {
76
+ requested_bytes += nr_bytes;
77
+ if (on_disk_file || can_seek) {
78
+ // if this is a plain file source OR we can seek we are not caching anything
79
+ return file_handle->Read(buffer, nr_bytes);
80
+ }
81
+ // not a plain file source: we need to do some bookkeeping around the reset functionality
82
+ idx_t result_offset = 0;
83
+ if (read_position < buffer_size) {
84
+ // we need to read from our cached buffer
85
+ auto buffer_read_count = MinValue<idx_t>(nr_bytes, buffer_size - read_position);
86
+ memcpy(buffer, cached_buffer.get() + read_position, buffer_read_count);
87
+ result_offset += buffer_read_count;
88
+ read_position += buffer_read_count;
89
+ if (result_offset == nr_bytes) {
90
+ return nr_bytes;
91
+ }
92
+ } else if (!reset_enabled && cached_buffer.IsSet()) {
93
+ // reset is disabled, but we still have cached data
94
+ // we can remove any cached data
95
+ cached_buffer.Reset();
96
+ buffer_size = 0;
97
+ buffer_capacity = 0;
98
+ read_position = 0;
99
+ }
100
+ // we have data left to read from the file
101
+ // read directly into the buffer
102
+ auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
103
+ file_size = file_handle->GetFileSize();
104
+ read_position += bytes_read;
105
+ if (reset_enabled) {
106
+ // if reset caching is enabled, we need to cache the bytes that we have read
107
+ if (buffer_size + bytes_read >= buffer_capacity) {
108
+ // no space; first enlarge the buffer
109
+ buffer_capacity = MaxValue<idx_t>(NextPowerOfTwo(buffer_size + bytes_read), buffer_capacity * 2);
110
+
111
+ auto new_buffer = allocator.Allocate(buffer_capacity);
112
+ if (buffer_size > 0) {
113
+ memcpy(new_buffer.get(), cached_buffer.get(), buffer_size);
114
+ }
115
+ cached_buffer = std::move(new_buffer);
116
+ }
117
+ memcpy(cached_buffer.get() + buffer_size, (char *)buffer + result_offset, bytes_read);
118
+ buffer_size += bytes_read;
119
+ }
120
+
121
+ return result_offset + bytes_read;
122
+ }
123
+
124
+ string CSVFileHandle::ReadLine() {
125
+ bool carriage_return = false;
126
+ string result;
127
+ char buffer[1];
128
+ while (true) {
129
+ idx_t bytes_read = Read(buffer, 1);
130
+ if (bytes_read == 0) {
131
+ return result;
132
+ }
133
+ if (carriage_return) {
134
+ if (buffer[0] != '\n') {
135
+ if (!file_handle->CanSeek()) {
136
+ throw BinderException(
137
+ "Carriage return newlines not supported when reading CSV files in which we cannot seek");
138
+ }
139
+ file_handle->Seek(file_handle->SeekPosition() - 1);
140
+ return result;
141
+ }
142
+ }
143
+ if (buffer[0] == '\n') {
144
+ return result;
145
+ }
146
+ if (buffer[0] != '\r') {
147
+ result += buffer[0];
148
+ } else {
149
+ carriage_return = true;
150
+ }
151
+ }
152
+ }
153
+
154
+ void CSVFileHandle::DisableReset() {
155
+ this->reset_enabled = false;
156
+ }
157
+
158
+ } // namespace duckdb
@@ -19,7 +19,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
19
19
  for (auto &aggr_expr : bound_pivot.aggregates) {
20
20
  auto &aggr = (BoundAggregateExpression &)*aggr_expr;
21
21
  // for each aggregate, initialize an empty aggregate state and finalize it immediately
22
- auto state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]);
22
+ auto state = make_unsafe_array<data_t>(aggr.function.state_size());
23
23
  aggr.function.initialize(state.get());
24
24
  Vector state_vector(Value::POINTER((uintptr_t)state.get()));
25
25
  Vector result_vector(aggr_expr->return_type);
@@ -23,11 +23,11 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(ClientContext &context, All
23
23
  tuple_size = layout.GetRowWidth();
24
24
 
25
25
  // allocate and null initialize the data
26
- owned_data = unique_ptr<data_t[]>(new data_t[tuple_size * total_groups]);
26
+ owned_data = make_unsafe_array<data_t>(tuple_size * total_groups);
27
27
  data = owned_data.get();
28
28
 
29
29
  // set up the empty payloads for every tuple, and initialize the "occupied" flag to false
30
- group_is_set = unique_ptr<bool[]>(new bool[total_groups]);
30
+ group_is_set = make_unsafe_array<bool>(total_groups);
31
31
  memset(group_is_set.get(), 0, total_groups * sizeof(bool));
32
32
 
33
33
  // initialize the hash table for each entry
@@ -334,7 +334,7 @@ public:
334
334
  //! The current position to scan the HT for output tuples
335
335
  idx_t ht_index;
336
336
  //! The set of aggregate scan states
337
- unique_ptr<TupleDataParallelScanState[]> ht_scan_states;
337
+ unsafe_array_ptr<TupleDataParallelScanState> ht_scan_states;
338
338
  atomic<bool> initialized;
339
339
  atomic<bool> finished;
340
340
  };
@@ -404,7 +404,7 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
404
404
  for (idx_t i = 0; i < op.aggregates.size(); i++) {
405
405
  D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
406
406
  auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
407
- auto aggr_state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]);
407
+ auto aggr_state = make_unsafe_array<data_t>(aggr.function.state_size());
408
408
  aggr.function.initialize(aggr_state.get());
409
409
 
410
410
  AggregateInputData aggr_input_data(aggr.bind_info.get(), Allocator::DefaultAllocator());
@@ -433,8 +433,7 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
433
433
  lock_guard<mutex> l(state.lock);
434
434
  if (!state.initialized) {
435
435
  auto &finalized_hts = gstate.finalized_hts;
436
- state.ht_scan_states =
437
- unique_ptr<TupleDataParallelScanState[]>(new TupleDataParallelScanState[finalized_hts.size()]);
436
+ state.ht_scan_states = make_unsafe_array<TupleDataParallelScanState>(finalized_hts.size());
438
437
 
439
438
  const auto &layout = gstate.finalized_hts[0]->GetDataCollection().GetLayout();
440
439
  vector<column_t> column_ids;
@@ -309,7 +309,7 @@ void WindowSegmentTree::ConstructTree() {
309
309
  level_nodes = (level_nodes + (TREE_FANOUT - 1)) / TREE_FANOUT;
310
310
  internal_nodes += level_nodes;
311
311
  } while (level_nodes > 1);
312
- levels_flat_native = unique_ptr<data_t[]>(new data_t[internal_nodes * state.size()]);
312
+ levels_flat_native = make_unsafe_array<data_t>(internal_nodes * state.size());
313
313
  levels_flat_start.push_back(0);
314
314
 
315
315
  idx_t levels_flat_offset = 0;
@@ -139,7 +139,7 @@ string PragmaImportDatabase(ClientContext &context, const FunctionParameters &pa
139
139
  auto handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
140
140
  FileSystem::DEFAULT_COMPRESSION);
141
141
  auto fsize = fs.GetFileSize(*handle);
142
- auto buffer = unique_ptr<char[]>(new char[fsize]);
142
+ auto buffer = make_unsafe_array<char>(fsize);
143
143
  fs.Read(*handle, buffer.get(), fsize);
144
144
  auto query = string(buffer.get(), fsize);
145
145
  // Replace the placeholder with the path provided to IMPORT
@@ -408,7 +408,7 @@ string StrfTimeFormat::Format(timestamp_t timestamp, const string &format_str) {
408
408
  auto time = Timestamp::GetTime(timestamp);
409
409
 
410
410
  auto len = format.GetLength(date, time, 0, nullptr);
411
- auto result = unique_ptr<char[]>(new char[len]);
411
+ auto result = make_unsafe_array<char>(len);
412
412
  format.FormatString(date, time, result.get());
413
413
  return string(result.get(), len);
414
414
  }
@@ -118,7 +118,7 @@ static void TemplatedConcatWS(DataChunk &args, string_t *sep_data, const Selecti
118
118
  const SelectionVector &rsel, idx_t count, Vector &result) {
119
119
  vector<idx_t> result_lengths(args.size(), 0);
120
120
  vector<bool> has_results(args.size(), false);
121
- auto orrified_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[args.ColumnCount() - 1]);
121
+ auto orrified_data = make_unsafe_array<UnifiedVectorFormat>(args.ColumnCount() - 1);
122
122
  for (idx_t col_idx = 1; col_idx < args.ColumnCount(); col_idx++) {
123
123
  args.data[col_idx].ToUnifiedFormat(args.size(), orrified_data[col_idx - 1]);
124
124
  }
@@ -395,11 +395,11 @@ bool ILikeOperatorFunction(string_t &str, string_t &pattern, char escape = '\0')
395
395
 
396
396
  // lowercase both the str and the pattern
397
397
  idx_t str_llength = LowerFun::LowerLength(str_data, str_size);
398
- auto str_ldata = unique_ptr<char[]>(new char[str_llength]);
398
+ auto str_ldata = make_unsafe_array<char>(str_llength);
399
399
  LowerFun::LowerCase(str_data, str_size, str_ldata.get());
400
400
 
401
401
  idx_t pat_llength = LowerFun::LowerLength(pat_data, pat_size);
402
- auto pat_ldata = unique_ptr<char[]>(new char[pat_llength]);
402
+ auto pat_ldata = make_unsafe_array<char>(pat_llength);
403
403
  LowerFun::LowerCase(pat_data, pat_size, pat_ldata.get());
404
404
  string_t str_lcase(str_ldata.get(), str_llength);
405
405
  string_t pat_lcase(pat_ldata.get(), pat_llength);
@@ -36,12 +36,12 @@ struct ExportAggregateBindData : public FunctionData {
36
36
  struct CombineState : public FunctionLocalState {
37
37
  idx_t state_size;
38
38
 
39
- unique_ptr<data_t[]> state_buffer0, state_buffer1;
39
+ unsafe_array_ptr<data_t> state_buffer0, state_buffer1;
40
40
  Vector state_vector0, state_vector1;
41
41
 
42
42
  explicit CombineState(idx_t state_size_p)
43
- : state_size(state_size_p), state_buffer0(unique_ptr<data_t[]>(new data_t[state_size_p])),
44
- state_buffer1(unique_ptr<data_t[]>(new data_t[state_size_p])),
43
+ : state_size(state_size_p), state_buffer0(make_unsafe_array<data_t>(state_size_p)),
44
+ state_buffer1(make_unsafe_array<data_t>(state_size_p)),
45
45
  state_vector0(Value::POINTER((uintptr_t)state_buffer0.get())),
46
46
  state_vector1(Value::POINTER((uintptr_t)state_buffer1.get())) {
47
47
  }
@@ -55,12 +55,12 @@ static unique_ptr<FunctionLocalState> InitCombineState(ExpressionState &state, c
55
55
 
56
56
  struct FinalizeState : public FunctionLocalState {
57
57
  idx_t state_size;
58
- unique_ptr<data_t[]> state_buffer;
58
+ unsafe_array_ptr<data_t> state_buffer;
59
59
  Vector addresses;
60
60
 
61
61
  explicit FinalizeState(idx_t state_size_p)
62
62
  : state_size(state_size_p),
63
- state_buffer(unique_ptr<data_t[]>(new data_t[STANDARD_VECTOR_SIZE * AlignValue(state_size_p)])),
63
+ state_buffer(make_unsafe_array<data_t>(STANDARD_VECTOR_SIZE * AlignValue(state_size_p))),
64
64
  addresses(LogicalType::POINTER) {
65
65
  }
66
66
  };
@@ -78,7 +78,7 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &i
78
78
  bind_data->is_simple = bind_data->options.delimiter.size() == 1 && bind_data->options.escape.size() == 1 &&
79
79
  bind_data->options.quote.size() == 1;
80
80
  if (bind_data->is_simple) {
81
- bind_data->requires_quotes = unique_ptr<bool[]>(new bool[256]);
81
+ bind_data->requires_quotes = make_unsafe_array<bool>(256);
82
82
  memset(bind_data->requires_quotes.get(), 0, sizeof(bool) * 256);
83
83
  bind_data->requires_quotes['\n'] = true;
84
84
  bind_data->requires_quotes['\r'] = true;
@@ -4,7 +4,6 @@
4
4
  #include "duckdb/main/database.hpp"
5
5
  #include "duckdb/common/string_util.hpp"
6
6
  #include "duckdb/common/enum_util.hpp"
7
- #include "duckdb/common/hive_partitioning.hpp"
8
7
  #include "duckdb/common/union_by_name.hpp"
9
8
  #include "duckdb/main/config.hpp"
10
9
  #include "duckdb/parser/expression/constant_expression.hpp"
@@ -15,7 +14,6 @@
15
14
  #include "duckdb/common/multi_file_reader.hpp"
16
15
  #include "duckdb/main/client_data.hpp"
17
16
  #include "duckdb/execution/operator/persistent/csv_line_info.hpp"
18
-
19
17
  #include <limits>
20
18
 
21
19
  namespace duckdb {
@@ -23,11 +21,8 @@ namespace duckdb {
23
21
  unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const string &file_path, FileCompressionType compression,
24
22
  ClientContext &context) {
25
23
  auto &fs = FileSystem::GetFileSystem(context);
26
- auto file_handle = fs.OpenFile(file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
27
- if (file_handle->CanSeek()) {
28
- file_handle->Reset();
29
- }
30
- return make_uniq<CSVFileHandle>(std::move(file_handle), false);
24
+ auto &allocator = BufferAllocator::Get(context);
25
+ return CSVFileHandle::OpenFile(fs, allocator, file_path, compression, false);
31
26
  }
32
27
 
33
28
  void ReadCSVData::FinalizeRead(ClientContext &context) {
@@ -238,14 +233,6 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
238
233
  } else {
239
234
  result->reader_bind = MultiFileReader::BindOptions(options.file_options, result->files, return_types, names);
240
235
  }
241
- auto &fs = FileSystem::GetFileSystem(context);
242
- for (auto &file : result->files) {
243
- if (fs.IsPipe(file)) {
244
- result->is_pipe = true;
245
- result->single_threaded = true;
246
- break;
247
- }
248
- }
249
236
  result->return_types = return_types;
250
237
  result->return_names = names;
251
238
  result->FinalizeRead(context);
@@ -265,7 +252,7 @@ static unique_ptr<FunctionData> ReadCSVAutoBind(ClientContext &context, TableFun
265
252
  struct ParallelCSVGlobalState : public GlobalTableFunctionState {
266
253
  public:
267
254
  ParallelCSVGlobalState(ClientContext &context, unique_ptr<CSVFileHandle> file_handle_p,
268
- vector<string> &files_path_p, idx_t system_threads_p, idx_t buffer_size_p,
255
+ const vector<string> &files_path_p, idx_t system_threads_p, idx_t buffer_size_p,
269
256
  idx_t rows_to_skip, bool force_parallelism_p, vector<column_t> column_ids_p, bool has_header)
270
257
  : file_handle(std::move(file_handle_p)), system_threads(system_threads_p), buffer_size(buffer_size_p),
271
258
  force_parallelism(force_parallelism_p), column_ids(std::move(column_ids_p)),
@@ -278,6 +265,7 @@ public:
278
265
  }
279
266
  file_size = file_handle->FileSize();
280
267
  first_file_size = file_size;
268
+ on_disk_file = file_handle->OnDiskFile();
281
269
  bytes_read = 0;
282
270
  if (buffer_size < file_size || file_size == 0) {
283
271
  bytes_per_local_state = buffer_size / ParallelCSVGlobalState::MaxThreads();
@@ -335,7 +323,7 @@ public:
335
323
 
336
324
  bool Finished();
337
325
 
338
- double GetProgress(ReadCSVData &bind_data) const {
326
+ double GetProgress(const ReadCSVData &bind_data) const {
339
327
  idx_t total_files = bind_data.files.size();
340
328
 
341
329
  // get the progress WITHIN the current file
@@ -369,6 +357,8 @@ private:
369
357
  idx_t bytes_per_local_state;
370
358
  //! Size of first file
371
359
  idx_t first_file_size;
360
+ //! Whether or not this is an on-disk file
361
+ bool on_disk_file = true;
372
362
  //! Basically max number of threads in DuckDB
373
363
  idx_t system_threads;
374
364
  //! Size of the buffers
@@ -402,7 +392,7 @@ private:
402
392
  };
403
393
 
404
394
  idx_t ParallelCSVGlobalState::MaxThreads() const {
405
- if (force_parallelism) {
395
+ if (force_parallelism || !on_disk_file) {
406
396
  return system_threads;
407
397
  }
408
398
  idx_t one_mb = 1000000; // We initialize max one thread per Mb
@@ -628,7 +618,7 @@ idx_t LineInfo::GetLine(idx_t batch_idx, idx_t line_error, idx_t file_idx, idx_t
628
618
 
629
619
  static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext &context,
630
620
  TableFunctionInitInput &input) {
631
- auto &bind_data = (ReadCSVData &)*input.bind_data;
621
+ auto &bind_data = input.bind_data->CastNoConst<ReadCSVData>();
632
622
  if (bind_data.files.empty()) {
633
623
  // This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
634
624
  return make_uniq<ParallelCSVGlobalState>();
@@ -636,7 +626,15 @@ static unique_ptr<GlobalTableFunctionState> ParallelCSVInitGlobal(ClientContext
636
626
  unique_ptr<CSVFileHandle> file_handle;
637
627
 
638
628
  bind_data.options.file_path = bind_data.files[0];
639
- file_handle = ReadCSV::OpenCSV(bind_data.options.file_path, bind_data.options.compression, context);
629
+
630
+ if (bind_data.initial_reader) {
631
+ file_handle = std::move(bind_data.initial_reader->file_handle);
632
+ file_handle->Reset();
633
+ file_handle->DisableReset();
634
+ bind_data.initial_reader.reset();
635
+ } else {
636
+ file_handle = ReadCSV::OpenCSV(bind_data.options.file_path, bind_data.options.compression, context);
637
+ }
640
638
  return make_uniq<ParallelCSVGlobalState>(
641
639
  context, std::move(file_handle), bind_data.files, context.db->NumberOfThreads(), bind_data.options.buffer_size,
642
640
  bind_data.options.skip_rows, ClientConfig::GetConfig(context).verify_parallelism, input.column_ids,
@@ -738,7 +736,7 @@ struct SingleThreadedCSVState : public GlobalTableFunctionState {
738
736
  return total_files;
739
737
  }
740
738
 
741
- double GetProgress(ReadCSVData &bind_data) const {
739
+ double GetProgress(const ReadCSVData &bind_data) const {
742
740
  D_ASSERT(total_files == bind_data.files.size());
743
741
  D_ASSERT(progress_in_files <= total_files * 100);
744
742
  return (double(progress_in_files) / double(total_files));
@@ -746,6 +744,16 @@ struct SingleThreadedCSVState : public GlobalTableFunctionState {
746
744
 
747
745
  unique_ptr<BufferedCSVReader> GetCSVReader(ClientContext &context, ReadCSVData &bind_data, idx_t &file_index,
748
746
  idx_t &total_size) {
747
+ auto reader = GetCSVReaderInternal(context, bind_data, file_index, total_size);
748
+ if (reader) {
749
+ reader->file_handle->DisableReset();
750
+ }
751
+ return reader;
752
+ }
753
+
754
+ private:
755
+ unique_ptr<BufferedCSVReader> GetCSVReaderInternal(ClientContext &context, ReadCSVData &bind_data,
756
+ idx_t &file_index, idx_t &total_size) {
749
757
  BufferedCSVReaderOptions options;
750
758
  {
751
759
  lock_guard<mutex> l(csv_lock);
@@ -799,14 +807,14 @@ public:
799
807
 
800
808
  static unique_ptr<GlobalTableFunctionState> SingleThreadedCSVInit(ClientContext &context,
801
809
  TableFunctionInitInput &input) {
802
- auto &bind_data = (ReadCSVData &)*input.bind_data;
810
+ auto &bind_data = input.bind_data->CastNoConst<ReadCSVData>();
803
811
  auto result = make_uniq<SingleThreadedCSVState>(bind_data.files.size());
804
812
  if (bind_data.files.empty()) {
805
813
  // This can happen when a filename based filter pushdown has eliminated all possible files for this scan.
806
814
  return std::move(result);
807
815
  } else {
808
816
  bind_data.options.file_path = bind_data.files[0];
809
- if (bind_data.initial_reader && bind_data.is_pipe) {
817
+ if (bind_data.initial_reader) {
810
818
  // If this is a pipe and an initial reader already exists due to read_csv_auto
811
819
  // We must re-use it, since we can't restart the reader due for it being a pipe.
812
820
  result->initial_reader = std::move(bind_data.initial_reader);
@@ -904,7 +912,7 @@ static void SingleThreadedCSVFunction(ClientContext &context, TableFunctionInput
904
912
  // Read CSV Functions
905
913
  //===--------------------------------------------------------------------===//
906
914
  static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &context, TableFunctionInitInput &input) {
907
- auto &bind_data = (ReadCSVData &)*input.bind_data;
915
+ auto &bind_data = input.bind_data->Cast<ReadCSVData>();
908
916
  if (bind_data.single_threaded) {
909
917
  return SingleThreadedCSVInit(context, input);
910
918
  } else {
@@ -914,7 +922,7 @@ static unique_ptr<GlobalTableFunctionState> ReadCSVInitGlobal(ClientContext &con
914
922
 
915
923
  unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context, TableFunctionInitInput &input,
916
924
  GlobalTableFunctionState *global_state_p) {
917
- auto &csv_data = (ReadCSVData &)*input.bind_data;
925
+ auto &csv_data = input.bind_data->Cast<ReadCSVData>();
918
926
  if (csv_data.single_threaded) {
919
927
  return SingleThreadedReadCSVInitLocal(context, input, global_state_p);
920
928
  } else {
@@ -923,7 +931,7 @@ unique_ptr<LocalTableFunctionState> ReadCSVInitLocal(ExecutionContext &context,
923
931
  }
924
932
 
925
933
  static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
926
- auto &bind_data = (ReadCSVData &)*data_p.bind_data;
934
+ auto &bind_data = data_p.bind_data->Cast<ReadCSVData>();
927
935
  if (bind_data.single_threaded) {
928
936
  SingleThreadedCSVFunction(context, data_p, output);
929
937
  } else {
@@ -933,7 +941,7 @@ static void ReadCSVFunction(ClientContext &context, TableFunctionInput &data_p,
933
941
 
934
942
  static idx_t CSVReaderGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
935
943
  LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state) {
936
- auto &bind_data = (ReadCSVData &)*bind_data_p;
944
+ auto &bind_data = bind_data_p->Cast<ReadCSVData>();
937
945
  if (bind_data.single_threaded) {
938
946
  auto &data = local_state->Cast<SingleThreadedCSVLocalState>();
939
947
  return data.file_index;
@@ -980,28 +988,28 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
980
988
 
981
989
  double CSVReaderProgress(ClientContext &context, const FunctionData *bind_data_p,
982
990
  const GlobalTableFunctionState *global_state) {
983
- auto &bind_data = (ReadCSVData &)*bind_data_p;
991
+ auto &bind_data = bind_data_p->Cast<ReadCSVData>();
984
992
  if (bind_data.single_threaded) {
985
- auto &data = (SingleThreadedCSVState &)*global_state;
993
+ auto &data = global_state->Cast<SingleThreadedCSVState>();
986
994
  return data.GetProgress(bind_data);
987
995
  } else {
988
- auto &data = (const ParallelCSVGlobalState &)*global_state;
996
+ auto &data = global_state->Cast<ParallelCSVGlobalState>();
989
997
  return data.GetProgress(bind_data);
990
998
  }
991
999
  }
992
1000
 
993
1001
  void CSVComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p,
994
1002
  vector<unique_ptr<Expression>> &filters) {
995
- auto data = (ReadCSVData *)bind_data_p;
1003
+ auto &data = bind_data_p->Cast<ReadCSVData>();
996
1004
  auto reset_reader =
997
- MultiFileReader::ComplexFilterPushdown(context, data->files, data->options.file_options, get, filters);
1005
+ MultiFileReader::ComplexFilterPushdown(context, data.files, data.options.file_options, get, filters);
998
1006
  if (reset_reader) {
999
- MultiFileReader::PruneReaders(*data);
1007
+ MultiFileReader::PruneReaders(data);
1000
1008
  }
1001
1009
  }
1002
1010
 
1003
1011
  unique_ptr<NodeStatistics> CSVReaderCardinality(ClientContext &context, const FunctionData *bind_data_p) {
1004
- auto &bind_data = (ReadCSVData &)*bind_data_p;
1012
+ auto &bind_data = bind_data_p->Cast<ReadCSVData>();
1005
1013
  idx_t per_file_cardinality = 0;
1006
1014
  if (bind_data.initial_reader && bind_data.initial_reader->file_handle) {
1007
1015
  auto estimated_row_width = (bind_data.csv_types.size() * 5);
@@ -1086,7 +1094,7 @@ void BufferedCSVReaderOptions::Deserialize(FieldReader &reader) {
1086
1094
  }
1087
1095
 
1088
1096
  static void CSVReaderSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const TableFunction &function) {
1089
- auto &bind_data = (ReadCSVData &)*bind_data_p;
1097
+ auto &bind_data = bind_data_p->Cast<ReadCSVData>();
1090
1098
  writer.WriteList<string>(bind_data.files);
1091
1099
  writer.WriteRegularSerializableList<LogicalType>(bind_data.csv_types);
1092
1100
  writer.WriteList<string>(bind_data.csv_names);