duckdb 0.7.2-dev3402.0 → 0.7.2-dev3515.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/package.json +2 -2
  2. package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
  3. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  4. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  5. package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
  6. package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -4
  7. package/src/duckdb/src/common/compressed_file_system.cpp +2 -2
  8. package/src/duckdb/src/common/file_system.cpp +2 -2
  9. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  10. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  11. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +1 -1
  12. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  13. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +4 -3
  14. package/src/duckdb/src/common/serializer.cpp +1 -1
  15. package/src/duckdb/src/common/sort/radix_sort.cpp +5 -5
  16. package/src/duckdb/src/common/string_util.cpp +2 -2
  17. package/src/duckdb/src/common/types/bit.cpp +2 -2
  18. package/src/duckdb/src/common/types/blob.cpp +2 -2
  19. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  20. package/src/duckdb/src/common/types/date.cpp +1 -1
  21. package/src/duckdb/src/common/types/decimal.cpp +2 -2
  22. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +14 -2
  23. package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
  24. package/src/duckdb/src/common/types/time.cpp +1 -1
  25. package/src/duckdb/src/common/types/vector.cpp +7 -7
  26. package/src/duckdb/src/common/virtual_file_system.cpp +4 -0
  27. package/src/duckdb/src/common/windows_util.cpp +2 -2
  28. package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +6 -3
  29. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -5
  30. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +10 -7
  31. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +1 -1
  32. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  33. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +3 -0
  34. package/src/duckdb/src/execution/join_hashtable.cpp +3 -3
  35. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
  36. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  37. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  38. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -0
  39. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  40. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -7
  41. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +4 -41
  42. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
  43. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +1 -1
  44. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +2 -2
  45. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +3 -4
  46. package/src/duckdb/src/execution/window_segment_tree.cpp +1 -1
  47. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
  48. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -1
  49. package/src/duckdb/src/function/scalar/string/concat.cpp +1 -1
  50. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  51. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  52. package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
  53. package/src/duckdb/src/function/table/read_csv.cpp +43 -35
  54. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  55. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
  56. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -3
  57. package/src/duckdb/src/include/duckdb/common/constants.hpp +0 -5
  58. package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -9
  59. package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +15 -0
  60. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +1 -0
  61. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/common/serializer/buffered_serializer.hpp +3 -2
  64. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -3
  65. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +11 -6
  66. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -1
  67. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -3
  69. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +4 -4
  70. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +53 -22
  71. package/src/duckdb/src/include/duckdb/common/vector.hpp +5 -2
  72. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +4 -4
  74. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +1 -1
  75. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +0 -2
  78. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -2
  79. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +27 -127
  80. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +2 -2
  81. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +2 -2
  82. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +2 -4
  84. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +3 -3
  85. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +5 -0
  86. package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -2
  87. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -2
  88. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +9 -2
  89. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +1 -1
  90. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
  91. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
  93. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +2 -0
  94. package/src/duckdb/src/main/client_context.cpp +1 -0
  95. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -1
  96. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +25 -1
  97. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +5 -5
  98. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +14 -6
  99. package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -1
  100. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +10 -0
  101. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +64 -42
  102. package/src/duckdb/src/storage/arena_allocator.cpp +1 -1
  103. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -11
  104. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
  105. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -2
  106. package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
  107. package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
  108. package/src/duckdb/src/storage/table/row_group.cpp +2 -2
  109. package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -25
  110. package/src/duckdb/src/storage/table/update_segment.cpp +7 -6
  111. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -2
  112. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +9 -0
  113. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +13 -12
  114. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  115. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +13050 -12885
  116. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  117. package/src/statement.cpp +15 -13
@@ -142,7 +142,7 @@ static idx_t FilterNullValues(UnifiedVectorFormat &vdata, const SelectionVector
142
142
  return result_count;
143
143
  }
144
144
 
145
- idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unique_ptr<UnifiedVectorFormat[]> &key_data,
145
+ idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unsafe_array_ptr<UnifiedVectorFormat> &key_data,
146
146
  const SelectionVector *&current_sel, SelectionVector &sel, bool build_side) {
147
147
  key_data = keys.ToUnifiedFormat();
148
148
 
@@ -197,7 +197,7 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
197
197
  }
198
198
 
199
199
  // prepare the keys for processing
200
- unique_ptr<UnifiedVectorFormat[]> key_data;
200
+ unsafe_array_ptr<UnifiedVectorFormat> key_data;
201
201
  const SelectionVector *current_sel;
202
202
  SelectionVector sel(STANDARD_VECTOR_SIZE);
203
203
  idx_t added_count = PrepareKeys(keys, key_data, current_sel, sel, true);
@@ -332,7 +332,7 @@ unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys
332
332
  auto ss = make_uniq<ScanStructure>(*this);
333
333
 
334
334
  if (join_type != JoinType::INNER) {
335
- ss->found_match = unique_ptr<bool[]>(new bool[STANDARD_VECTOR_SIZE]);
335
+ ss->found_match = make_unsafe_array<bool>(STANDARD_VECTOR_SIZE);
336
336
  memset(ss->found_match.get(), 0, sizeof(bool) * STANDARD_VECTOR_SIZE);
337
337
  }
338
338
 
@@ -39,7 +39,7 @@ struct AggregateState {
39
39
  for (auto &aggregate : aggregate_expressions) {
40
40
  D_ASSERT(aggregate->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
41
41
  auto &aggr = aggregate->Cast<BoundAggregateExpression>();
42
- auto state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]);
42
+ auto state = make_unsafe_array<data_t>(aggr.function.state_size());
43
43
  aggr.function.initialize(state.get());
44
44
  aggregates.push_back(std::move(state));
45
45
  bind_data.push_back(aggr.bind_info.get());
@@ -69,7 +69,7 @@ struct AggregateState {
69
69
  }
70
70
 
71
71
  //! The aggregate values
72
- vector<unique_ptr<data_t[]>> aggregates;
72
+ vector<unsafe_array_ptr<data_t>> aggregates;
73
73
  //! The bind data
74
74
  vector<FunctionData *> bind_data;
75
75
  //! The destructors
@@ -10,7 +10,7 @@ void OuterJoinMarker::Initialize(idx_t count_p) {
10
10
  return;
11
11
  }
12
12
  this->count = count_p;
13
- found_match = unique_ptr<bool[]>(new bool[count]);
13
+ found_match = make_unsafe_array<bool>(count);
14
14
  Reset();
15
15
  }
16
16
 
@@ -25,7 +25,7 @@ bool PerfectHashJoinExecutor::BuildPerfectHashTable(LogicalType &key_type) {
25
25
  }
26
26
 
27
27
  // and for duplicate_checking
28
- bitmap_build_idx = unique_ptr<bool[]>(new bool[build_size]);
28
+ bitmap_build_idx = make_unsafe_array<bool>(build_size);
29
29
  memset(bitmap_build_idx.get(), 0, sizeof(bool) * build_size); // set false
30
30
 
31
31
  // Now fill columns with build data
@@ -213,6 +213,8 @@ OperatorResultType PhysicalIndexJoin::ExecuteInternal(ExecutionContext &context,
213
213
  state.lhs_idx = 0;
214
214
  state.rhs_idx = 0;
215
215
  state.first_fetch = true;
216
+ // reset the LHS chunk to reset the validity masks
217
+ state.join_keys.Reset();
216
218
  return OperatorResultType::NEED_MORE_INPUT;
217
219
  }
218
220
  //! Output vectors
@@ -72,7 +72,7 @@ void PhysicalRangeJoin::GlobalSortedTable::Combine(LocalSortedTable &ltable) {
72
72
  }
73
73
 
74
74
  void PhysicalRangeJoin::GlobalSortedTable::IntializeMatches() {
75
- found_match = unique_ptr<bool[]>(new bool[Count()]);
75
+ found_match = make_unsafe_array<bool>(Count());
76
76
  memset(found_match.get(), 0, sizeof(bool) * Count());
77
77
  }
78
78
 
@@ -34,7 +34,7 @@ string BaseCSVReader::GetLineNumberStr(idx_t line_error, bool is_line_estimated,
34
34
 
35
35
  BaseCSVReader::BaseCSVReader(ClientContext &context_p, BufferedCSVReaderOptions options_p,
36
36
  const vector<LogicalType> &requested_types)
37
- : context(context_p), fs(FileSystem::GetFileSystem(context)), allocator(Allocator::Get(context)),
37
+ : context(context_p), fs(FileSystem::GetFileSystem(context)), allocator(BufferAllocator::Get(context)),
38
38
  options(std::move(options_p)) {
39
39
  }
40
40
 
@@ -42,12 +42,7 @@ BaseCSVReader::~BaseCSVReader() {
42
42
  }
43
43
 
44
44
  unique_ptr<CSVFileHandle> BaseCSVReader::OpenCSV(const BufferedCSVReaderOptions &options_p) {
45
- auto file_handle = fs.OpenFile(options_p.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK,
46
- options_p.compression);
47
- if (file_handle->CanSeek()) {
48
- file_handle->Reset();
49
- }
50
- return make_uniq<CSVFileHandle>(std::move(file_handle));
45
+ return CSVFileHandle::OpenFile(fs, allocator, options_p.file_path, options_p.compression, true);
51
46
  }
52
47
 
53
48
  void BaseCSVReader::InitParseChunk(idx_t num_cols) {
@@ -239,18 +239,13 @@ void BufferedCSVReader::Initialize(const vector<LogicalType> &requested_types) {
239
239
  if (return_types.empty()) {
240
240
  throw InvalidInputException("Failed to detect column types from CSV: is the file a valid CSV file?");
241
241
  }
242
- if (cached_chunks.empty()) {
243
- JumpToBeginning(options.skip_rows, options.header);
244
- }
242
+ JumpToBeginning(options.skip_rows, options.header);
245
243
  } else {
246
244
  return_types = requested_types;
247
245
  ResetBuffer();
248
246
  SkipRowsAndReadHeader(options.skip_rows, options.header);
249
247
  }
250
248
  InitParseChunk(return_types.size());
251
- // we only need reset support during the automatic CSV type detection
252
- // since reset support might require caching (in the case of streams), we disable it for the remainder
253
- file_handle->DisableReset();
254
249
  }
255
250
 
256
251
  void BufferedCSVReader::ResetBuffer() {
@@ -262,13 +257,7 @@ void BufferedCSVReader::ResetBuffer() {
262
257
  }
263
258
 
264
259
  void BufferedCSVReader::ResetStream() {
265
- if (!file_handle->CanSeek()) {
266
- // seeking to the beginning appears to not be supported in all compiler/os-scenarios,
267
- // so we have to create a new stream source here for now
268
- file_handle->Reset();
269
- } else {
270
- file_handle->Seek(0);
271
- }
260
+ file_handle->Reset();
272
261
  linenr = 0;
273
262
  linenr_estimated = false;
274
263
  bytes_per_line_avg = 0;
@@ -332,7 +321,7 @@ bool BufferedCSVReader::JumpToNextSample() {
332
321
 
333
322
  // if we deal with any other sources than plaintext files, jumping_samples can be tricky. In that case
334
323
  // we just read x continuous chunks from the stream TODO: make jumps possible for zipfiles.
335
- if (!file_handle->PlainFileSource() || !jumping_samples) {
324
+ if (!file_handle->OnDiskFile() || !jumping_samples) {
336
325
  sample_chunk_idx++;
337
326
  return true;
338
327
  }
@@ -802,21 +791,6 @@ vector<LogicalType> BufferedCSVReader::RefineTypeDetection(const vector<LogicalT
802
791
  }
803
792
  }
804
793
  }
805
-
806
- if (!jumping_samples) {
807
- if ((sample_chunk_idx)*options.sample_chunk_size <= options.buffer_size) {
808
- // cache parse chunk
809
- // create a new chunk and fill it with the remainder
810
- auto chunk = make_uniq<DataChunk>();
811
- auto parse_chunk_types = parse_chunk.GetTypes();
812
- chunk->Move(parse_chunk);
813
- cached_chunks.push(std::move(chunk));
814
- } else {
815
- while (!cached_chunks.empty()) {
816
- cached_chunks.pop();
817
- }
818
- }
819
- }
820
794
  }
821
795
 
822
796
  // set sql types
@@ -1445,7 +1419,7 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start, idx_t &line_start) {
1445
1419
  GetLineNumberStr(linenr, linenr_estimated));
1446
1420
  }
1447
1421
 
1448
- buffer = unique_ptr<char[]>(new char[buffer_read_size + remaining + 1]);
1422
+ buffer = make_unsafe_array<char>(buffer_read_size + remaining + 1);
1449
1423
  buffer_size = remaining + buffer_read_size;
1450
1424
  if (remaining > 0) {
1451
1425
  // remaining from last buffer: copy it here
@@ -1474,17 +1448,6 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start, idx_t &line_start) {
1474
1448
  }
1475
1449
 
1476
1450
  void BufferedCSVReader::ParseCSV(DataChunk &insert_chunk) {
1477
- // if no auto-detect or auto-detect with jumping samples, we have nothing cached and start from the beginning
1478
- if (cached_chunks.empty()) {
1479
- cached_buffers.clear();
1480
- } else {
1481
- auto &chunk = cached_chunks.front();
1482
- parse_chunk.Move(*chunk);
1483
- cached_chunks.pop();
1484
- Flush(insert_chunk);
1485
- return;
1486
- }
1487
-
1488
1451
  string error_message;
1489
1452
  if (!TryParseCSV(ParserMode::PARSING, insert_chunk, error_message)) {
1490
1453
  throw InvalidInputException(error_message);
@@ -0,0 +1,158 @@
1
+ #include "duckdb/execution/operator/persistent/csv_file_handle.hpp"
2
+
3
+ namespace duckdb {
4
+
5
+ CSVFileHandle::CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<FileHandle> file_handle_p,
6
+ const string &path_p, FileCompressionType compression, bool enable_reset)
7
+ : fs(fs), allocator(allocator), file_handle(std::move(file_handle_p)), path(path_p), compression(compression),
8
+ reset_enabled(enable_reset) {
9
+ can_seek = file_handle->CanSeek();
10
+ on_disk_file = file_handle->OnDiskFile();
11
+ file_size = file_handle->GetFileSize();
12
+ }
13
+
14
+ unique_ptr<FileHandle> CSVFileHandle::OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
15
+ FileCompressionType compression) {
16
+ auto file_handle = fs.OpenFile(path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
17
+ if (file_handle->CanSeek()) {
18
+ file_handle->Reset();
19
+ }
20
+ return file_handle;
21
+ }
22
+
23
+ unique_ptr<CSVFileHandle> CSVFileHandle::OpenFile(FileSystem &fs, Allocator &allocator, const string &path,
24
+ FileCompressionType compression, bool enable_reset) {
25
+ auto file_handle = CSVFileHandle::OpenFileHandle(fs, allocator, path, compression);
26
+ return make_uniq<CSVFileHandle>(fs, allocator, std::move(file_handle), path, compression, enable_reset);
27
+ }
28
+
29
+ bool CSVFileHandle::CanSeek() {
30
+ return can_seek;
31
+ }
32
+
33
+ void CSVFileHandle::Seek(idx_t position) {
34
+ if (!can_seek) {
35
+ throw InternalException("Cannot seek in this file");
36
+ }
37
+ file_handle->Seek(position);
38
+ }
39
+
40
+ idx_t CSVFileHandle::SeekPosition() {
41
+ if (!can_seek) {
42
+ throw InternalException("Cannot seek in this file");
43
+ }
44
+ return file_handle->SeekPosition();
45
+ }
46
+
47
+ void CSVFileHandle::Reset() {
48
+ requested_bytes = 0;
49
+ read_position = 0;
50
+ if (can_seek) {
51
+ // we can seek - reset the file handle
52
+ file_handle->Reset();
53
+ } else if (on_disk_file) {
54
+ // we cannot seek but it is an on-disk file - re-open the file
55
+ file_handle = CSVFileHandle::OpenFileHandle(fs, allocator, path, compression);
56
+ } else {
57
+ if (!reset_enabled) {
58
+ throw InternalException("Reset called but reset is not enabled for this CSV Handle");
59
+ }
60
+ read_position = 0;
61
+ }
62
+ }
63
+ bool CSVFileHandle::OnDiskFile() {
64
+ return on_disk_file;
65
+ }
66
+
67
+ idx_t CSVFileHandle::FileSize() {
68
+ return file_size;
69
+ }
70
+
71
+ bool CSVFileHandle::FinishedReading() {
72
+ return requested_bytes >= file_size;
73
+ }
74
+
75
+ idx_t CSVFileHandle::Read(void *buffer, idx_t nr_bytes) {
76
+ requested_bytes += nr_bytes;
77
+ if (on_disk_file || can_seek) {
78
+ // if this is a plain file source OR we can seek we are not caching anything
79
+ return file_handle->Read(buffer, nr_bytes);
80
+ }
81
+ // not a plain file source: we need to do some bookkeeping around the reset functionality
82
+ idx_t result_offset = 0;
83
+ if (read_position < buffer_size) {
84
+ // we need to read from our cached buffer
85
+ auto buffer_read_count = MinValue<idx_t>(nr_bytes, buffer_size - read_position);
86
+ memcpy(buffer, cached_buffer.get() + read_position, buffer_read_count);
87
+ result_offset += buffer_read_count;
88
+ read_position += buffer_read_count;
89
+ if (result_offset == nr_bytes) {
90
+ return nr_bytes;
91
+ }
92
+ } else if (!reset_enabled && cached_buffer.IsSet()) {
93
+ // reset is disabled, but we still have cached data
94
+ // we can remove any cached data
95
+ cached_buffer.Reset();
96
+ buffer_size = 0;
97
+ buffer_capacity = 0;
98
+ read_position = 0;
99
+ }
100
+ // we have data left to read from the file
101
+ // read directly into the buffer
102
+ auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
103
+ file_size = file_handle->GetFileSize();
104
+ read_position += bytes_read;
105
+ if (reset_enabled) {
106
+ // if reset caching is enabled, we need to cache the bytes that we have read
107
+ if (buffer_size + bytes_read >= buffer_capacity) {
108
+ // no space; first enlarge the buffer
109
+ buffer_capacity = MaxValue<idx_t>(NextPowerOfTwo(buffer_size + bytes_read), buffer_capacity * 2);
110
+
111
+ auto new_buffer = allocator.Allocate(buffer_capacity);
112
+ if (buffer_size > 0) {
113
+ memcpy(new_buffer.get(), cached_buffer.get(), buffer_size);
114
+ }
115
+ cached_buffer = std::move(new_buffer);
116
+ }
117
+ memcpy(cached_buffer.get() + buffer_size, (char *)buffer + result_offset, bytes_read);
118
+ buffer_size += bytes_read;
119
+ }
120
+
121
+ return result_offset + bytes_read;
122
+ }
123
+
124
+ string CSVFileHandle::ReadLine() {
125
+ bool carriage_return = false;
126
+ string result;
127
+ char buffer[1];
128
+ while (true) {
129
+ idx_t bytes_read = Read(buffer, 1);
130
+ if (bytes_read == 0) {
131
+ return result;
132
+ }
133
+ if (carriage_return) {
134
+ if (buffer[0] != '\n') {
135
+ if (!file_handle->CanSeek()) {
136
+ throw BinderException(
137
+ "Carriage return newlines not supported when reading CSV files in which we cannot seek");
138
+ }
139
+ file_handle->Seek(file_handle->SeekPosition() - 1);
140
+ return result;
141
+ }
142
+ }
143
+ if (buffer[0] == '\n') {
144
+ return result;
145
+ }
146
+ if (buffer[0] != '\r') {
147
+ result += buffer[0];
148
+ } else {
149
+ carriage_return = true;
150
+ }
151
+ }
152
+ }
153
+
154
+ void CSVFileHandle::DisableReset() {
155
+ this->reset_enabled = false;
156
+ }
157
+
158
+ } // namespace duckdb
@@ -19,7 +19,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
19
19
  for (auto &aggr_expr : bound_pivot.aggregates) {
20
20
  auto &aggr = (BoundAggregateExpression &)*aggr_expr;
21
21
  // for each aggregate, initialize an empty aggregate state and finalize it immediately
22
- auto state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]);
22
+ auto state = make_unsafe_array<data_t>(aggr.function.state_size());
23
23
  aggr.function.initialize(state.get());
24
24
  Vector state_vector(Value::POINTER((uintptr_t)state.get()));
25
25
  Vector result_vector(aggr_expr->return_type);
@@ -23,11 +23,11 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(ClientContext &context, All
23
23
  tuple_size = layout.GetRowWidth();
24
24
 
25
25
  // allocate and null initialize the data
26
- owned_data = unique_ptr<data_t[]>(new data_t[tuple_size * total_groups]);
26
+ owned_data = make_unsafe_array<data_t>(tuple_size * total_groups);
27
27
  data = owned_data.get();
28
28
 
29
29
  // set up the empty payloads for every tuple, and initialize the "occupied" flag to false
30
- group_is_set = unique_ptr<bool[]>(new bool[total_groups]);
30
+ group_is_set = make_unsafe_array<bool>(total_groups);
31
31
  memset(group_is_set.get(), 0, total_groups * sizeof(bool));
32
32
 
33
33
  // initialize the hash table for each entry
@@ -334,7 +334,7 @@ public:
334
334
  //! The current position to scan the HT for output tuples
335
335
  idx_t ht_index;
336
336
  //! The set of aggregate scan states
337
- unique_ptr<TupleDataParallelScanState[]> ht_scan_states;
337
+ unsafe_array_ptr<TupleDataParallelScanState> ht_scan_states;
338
338
  atomic<bool> initialized;
339
339
  atomic<bool> finished;
340
340
  };
@@ -404,7 +404,7 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
404
404
  for (idx_t i = 0; i < op.aggregates.size(); i++) {
405
405
  D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
406
406
  auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
407
- auto aggr_state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]);
407
+ auto aggr_state = make_unsafe_array<data_t>(aggr.function.state_size());
408
408
  aggr.function.initialize(aggr_state.get());
409
409
 
410
410
  AggregateInputData aggr_input_data(aggr.bind_info.get(), Allocator::DefaultAllocator());
@@ -433,8 +433,7 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
433
433
  lock_guard<mutex> l(state.lock);
434
434
  if (!state.initialized) {
435
435
  auto &finalized_hts = gstate.finalized_hts;
436
- state.ht_scan_states =
437
- unique_ptr<TupleDataParallelScanState[]>(new TupleDataParallelScanState[finalized_hts.size()]);
436
+ state.ht_scan_states = make_unsafe_array<TupleDataParallelScanState>(finalized_hts.size());
438
437
 
439
438
  const auto &layout = gstate.finalized_hts[0]->GetDataCollection().GetLayout();
440
439
  vector<column_t> column_ids;
@@ -309,7 +309,7 @@ void WindowSegmentTree::ConstructTree() {
309
309
  level_nodes = (level_nodes + (TREE_FANOUT - 1)) / TREE_FANOUT;
310
310
  internal_nodes += level_nodes;
311
311
  } while (level_nodes > 1);
312
- levels_flat_native = unique_ptr<data_t[]>(new data_t[internal_nodes * state.size()]);
312
+ levels_flat_native = make_unsafe_array<data_t>(internal_nodes * state.size());
313
313
  levels_flat_start.push_back(0);
314
314
 
315
315
  idx_t levels_flat_offset = 0;
@@ -139,7 +139,7 @@ string PragmaImportDatabase(ClientContext &context, const FunctionParameters &pa
139
139
  auto handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
140
140
  FileSystem::DEFAULT_COMPRESSION);
141
141
  auto fsize = fs.GetFileSize(*handle);
142
- auto buffer = unique_ptr<char[]>(new char[fsize]);
142
+ auto buffer = make_unsafe_array<char>(fsize);
143
143
  fs.Read(*handle, buffer.get(), fsize);
144
144
  auto query = string(buffer.get(), fsize);
145
145
  // Replace the placeholder with the path provided to IMPORT
@@ -408,7 +408,7 @@ string StrfTimeFormat::Format(timestamp_t timestamp, const string &format_str) {
408
408
  auto time = Timestamp::GetTime(timestamp);
409
409
 
410
410
  auto len = format.GetLength(date, time, 0, nullptr);
411
- auto result = unique_ptr<char[]>(new char[len]);
411
+ auto result = make_unsafe_array<char>(len);
412
412
  format.FormatString(date, time, result.get());
413
413
  return string(result.get(), len);
414
414
  }
@@ -118,7 +118,7 @@ static void TemplatedConcatWS(DataChunk &args, string_t *sep_data, const Selecti
118
118
  const SelectionVector &rsel, idx_t count, Vector &result) {
119
119
  vector<idx_t> result_lengths(args.size(), 0);
120
120
  vector<bool> has_results(args.size(), false);
121
- auto orrified_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[args.ColumnCount() - 1]);
121
+ auto orrified_data = make_unsafe_array<UnifiedVectorFormat>(args.ColumnCount() - 1);
122
122
  for (idx_t col_idx = 1; col_idx < args.ColumnCount(); col_idx++) {
123
123
  args.data[col_idx].ToUnifiedFormat(args.size(), orrified_data[col_idx - 1]);
124
124
  }
@@ -395,11 +395,11 @@ bool ILikeOperatorFunction(string_t &str, string_t &pattern, char escape = '\0')
395
395
 
396
396
  // lowercase both the str and the pattern
397
397
  idx_t str_llength = LowerFun::LowerLength(str_data, str_size);
398
- auto str_ldata = unique_ptr<char[]>(new char[str_llength]);
398
+ auto str_ldata = make_unsafe_array<char>(str_llength);
399
399
  LowerFun::LowerCase(str_data, str_size, str_ldata.get());
400
400
 
401
401
  idx_t pat_llength = LowerFun::LowerLength(pat_data, pat_size);
402
- auto pat_ldata = unique_ptr<char[]>(new char[pat_llength]);
402
+ auto pat_ldata = make_unsafe_array<char>(pat_llength);
403
403
  LowerFun::LowerCase(pat_data, pat_size, pat_ldata.get());
404
404
  string_t str_lcase(str_ldata.get(), str_llength);
405
405
  string_t pat_lcase(pat_ldata.get(), pat_llength);
@@ -36,12 +36,12 @@ struct ExportAggregateBindData : public FunctionData {
36
36
  struct CombineState : public FunctionLocalState {
37
37
  idx_t state_size;
38
38
 
39
- unique_ptr<data_t[]> state_buffer0, state_buffer1;
39
+ unsafe_array_ptr<data_t> state_buffer0, state_buffer1;
40
40
  Vector state_vector0, state_vector1;
41
41
 
42
42
  explicit CombineState(idx_t state_size_p)
43
- : state_size(state_size_p), state_buffer0(unique_ptr<data_t[]>(new data_t[state_size_p])),
44
- state_buffer1(unique_ptr<data_t[]>(new data_t[state_size_p])),
43
+ : state_size(state_size_p), state_buffer0(make_unsafe_array<data_t>(state_size_p)),
44
+ state_buffer1(make_unsafe_array<data_t>(state_size_p)),
45
45
  state_vector0(Value::POINTER((uintptr_t)state_buffer0.get())),
46
46
  state_vector1(Value::POINTER((uintptr_t)state_buffer1.get())) {
47
47
  }
@@ -55,12 +55,12 @@ static unique_ptr<FunctionLocalState> InitCombineState(ExpressionState &state, c
55
55
 
56
56
  struct FinalizeState : public FunctionLocalState {
57
57
  idx_t state_size;
58
- unique_ptr<data_t[]> state_buffer;
58
+ unsafe_array_ptr<data_t> state_buffer;
59
59
  Vector addresses;
60
60
 
61
61
  explicit FinalizeState(idx_t state_size_p)
62
62
  : state_size(state_size_p),
63
- state_buffer(unique_ptr<data_t[]>(new data_t[STANDARD_VECTOR_SIZE * AlignValue(state_size_p)])),
63
+ state_buffer(make_unsafe_array<data_t>(STANDARD_VECTOR_SIZE * AlignValue(state_size_p))),
64
64
  addresses(LogicalType::POINTER) {
65
65
  }
66
66
  };
@@ -78,7 +78,7 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &i
78
78
  bind_data->is_simple = bind_data->options.delimiter.size() == 1 && bind_data->options.escape.size() == 1 &&
79
79
  bind_data->options.quote.size() == 1;
80
80
  if (bind_data->is_simple) {
81
- bind_data->requires_quotes = unique_ptr<bool[]>(new bool[256]);
81
+ bind_data->requires_quotes = make_unsafe_array<bool>(256);
82
82
  memset(bind_data->requires_quotes.get(), 0, sizeof(bool) * 256);
83
83
  bind_data->requires_quotes['\n'] = true;
84
84
  bind_data->requires_quotes['\r'] = true;