duckdb 0.7.2-dev3441.0 → 0.7.2-dev3546.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/configure.py +2 -0
  2. package/package.json +2 -2
  3. package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
  4. package/src/duckdb/extension/json/json_functions/read_json.cpp +1 -0
  5. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +42 -0
  6. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -0
  7. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  8. package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
  9. package/src/duckdb/src/common/arrow/arrow_converter.cpp +4 -4
  10. package/src/duckdb/src/common/compressed_file_system.cpp +2 -2
  11. package/src/duckdb/src/common/constants.cpp +1 -0
  12. package/src/duckdb/src/common/file_system.cpp +2 -2
  13. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -2
  14. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +1 -1
  15. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +1 -1
  16. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  17. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +4 -3
  18. package/src/duckdb/src/common/serializer.cpp +1 -1
  19. package/src/duckdb/src/common/sort/radix_sort.cpp +5 -5
  20. package/src/duckdb/src/common/string_util.cpp +2 -2
  21. package/src/duckdb/src/common/types/bit.cpp +2 -2
  22. package/src/duckdb/src/common/types/blob.cpp +2 -2
  23. package/src/duckdb/src/common/types/data_chunk.cpp +2 -2
  24. package/src/duckdb/src/common/types/date.cpp +1 -1
  25. package/src/duckdb/src/common/types/decimal.cpp +2 -2
  26. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +14 -2
  27. package/src/duckdb/src/common/types/selection_vector.cpp +1 -1
  28. package/src/duckdb/src/common/types/time.cpp +1 -1
  29. package/src/duckdb/src/common/types/vector.cpp +10 -10
  30. package/src/duckdb/src/common/types/vector_buffer.cpp +11 -3
  31. package/src/duckdb/src/common/types/vector_cache.cpp +5 -5
  32. package/src/duckdb/src/common/virtual_file_system.cpp +4 -0
  33. package/src/duckdb/src/common/windows_util.cpp +2 -2
  34. package/src/duckdb/src/core_functions/aggregate/distributive/string_agg.cpp +6 -3
  35. package/src/duckdb/src/core_functions/scalar/list/list_aggregates.cpp +2 -5
  36. package/src/duckdb/src/core_functions/scalar/string/printf.cpp +1 -1
  37. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  38. package/src/duckdb/src/execution/join_hashtable.cpp +3 -3
  39. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +2 -2
  40. package/src/duckdb/src/execution/operator/join/outer_join_marker.cpp +1 -1
  41. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +1 -1
  42. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +1 -1
  43. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -7
  44. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +4 -41
  45. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +158 -0
  46. package/src/duckdb/src/execution/operator/projection/physical_pivot.cpp +1 -1
  47. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -2
  48. package/src/duckdb/src/execution/perfect_aggregate_hashtable.cpp +2 -2
  49. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +3 -4
  50. package/src/duckdb/src/execution/window_segment_tree.cpp +1 -1
  51. package/src/duckdb/src/function/macro_function.cpp +43 -0
  52. package/src/duckdb/src/function/pragma/pragma_queries.cpp +1 -1
  53. package/src/duckdb/src/function/scalar/strftime_format.cpp +2 -1
  54. package/src/duckdb/src/function/scalar/string/concat.cpp +1 -1
  55. package/src/duckdb/src/function/scalar/string/like.cpp +2 -2
  56. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -5
  57. package/src/duckdb/src/function/scalar_macro_function.cpp +10 -0
  58. package/src/duckdb/src/function/table/copy_csv.cpp +3 -7
  59. package/src/duckdb/src/function/table/read_csv.cpp +60 -35
  60. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  61. package/src/duckdb/src/function/table_macro_function.cpp +10 -0
  62. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -1
  63. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp +0 -6
  64. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp +0 -6
  65. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
  66. package/src/duckdb/src/include/duckdb/common/compressed_file_system.hpp +3 -3
  67. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -5
  68. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +3 -3
  69. package/src/duckdb/src/include/duckdb/common/helper.hpp +22 -9
  70. package/src/duckdb/src/include/duckdb/common/memory_safety.hpp +15 -0
  71. package/src/duckdb/src/include/duckdb/common/optional_ptr.hpp +1 -0
  72. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +1 -1
  73. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/common/serializer/buffered_serializer.hpp +3 -2
  75. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -3
  76. package/src/duckdb/src/include/duckdb/common/sort/duckdb_pdqsort.hpp +11 -6
  77. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +2 -1
  78. package/src/duckdb/src/include/duckdb/common/types/selection_vector.hpp +1 -1
  79. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +3 -3
  80. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +16 -6
  81. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +53 -22
  82. package/src/duckdb/src/include/duckdb/common/vector.hpp +5 -2
  83. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +4 -4
  85. package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +1 -1
  86. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +1 -1
  87. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_range_join.hpp +1 -1
  88. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +0 -2
  89. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +2 -2
  90. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +27 -127
  91. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +1 -2
  92. package/src/duckdb/src/include/duckdb/execution/operator/persistent/parallel_csv_reader.hpp +2 -2
  93. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +1 -1
  94. package/src/duckdb/src/include/duckdb/execution/perfect_aggregate_hashtable.hpp +2 -2
  95. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +7 -1
  97. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +3 -4
  98. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +7 -2
  99. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +2 -4
  100. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +5 -0
  101. package/src/duckdb/src/include/duckdb/optimizer/join_order/join_relation.hpp +3 -3
  102. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +4 -7
  103. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +8 -12
  104. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +6 -20
  105. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +6 -18
  106. package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +4 -8
  107. package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +4 -38
  108. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +5 -2
  109. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -10
  110. package/src/duckdb/src/include/duckdb/parser/statement/insert_statement.hpp +5 -0
  111. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +2 -2
  112. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +9 -2
  113. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +1 -1
  114. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +1 -1
  115. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +1 -1
  116. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +2 -2
  117. package/src/duckdb/src/optimizer/join_order/join_relation_set.cpp +5 -5
  118. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +14 -6
  119. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +42 -0
  120. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +0 -7
  121. package/src/duckdb/src/parser/parsed_data/create_info.cpp +19 -8
  122. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +46 -0
  123. package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +56 -0
  124. package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +47 -0
  125. package/src/duckdb/src/parser/parsed_data/detach_info.cpp +34 -0
  126. package/src/duckdb/src/parser/parsed_data/drop_info.cpp +46 -0
  127. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +24 -0
  128. package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +37 -0
  129. package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -1
  130. package/src/duckdb/src/parser/transform/statement/transform_insert.cpp +10 -0
  131. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +27 -9
  132. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -1
  133. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
  134. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +32 -7
  135. package/src/duckdb/src/planner/logical_operator.cpp +1 -2
  136. package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -25
  137. package/src/duckdb/src/planner/operator/logical_insert.cpp +30 -0
  138. package/src/duckdb/src/planner/operator/logical_simple.cpp +33 -5
  139. package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +6 -16
  140. package/src/duckdb/src/planner/planner.cpp +4 -13
  141. package/src/duckdb/src/storage/arena_allocator.cpp +1 -1
  142. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -11
  143. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +1 -1
  144. package/src/duckdb/src/storage/checkpoint_manager.cpp +12 -6
  145. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -2
  146. package/src/duckdb/src/storage/statistics/list_stats.cpp +1 -1
  147. package/src/duckdb/src/storage/statistics/struct_stats.cpp +1 -1
  148. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  149. package/src/duckdb/src/storage/table/row_group.cpp +2 -2
  150. package/src/duckdb/src/storage/table/update_segment.cpp +7 -6
  151. package/src/duckdb/third_party/fsst/libfsst.cpp +1 -2
  152. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +9 -0
  153. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +13 -12
  154. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +2 -1
  155. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12537 -12415
  156. package/src/duckdb/ub_src_catalog_catalog_entry.cpp +1 -1
  157. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  158. package/src/duckdb/ub_src_parser_parsed_data.cpp +16 -0
  159. package/src/statement.cpp +15 -13
  160. package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +0 -104
@@ -15,6 +15,10 @@ unique_ptr<FileHandle> VirtualFileSystem::OpenFile(const string &path, uint8_t f
15
15
  if (compression == FileCompressionType::AUTO_DETECT) {
16
16
  // auto detect compression settings based on file name
17
17
  auto lower_path = StringUtil::Lower(path);
18
+ if (StringUtil::EndsWith(lower_path, ".tmp")) {
19
+ // strip .tmp
20
+ lower_path = lower_path.substr(0, lower_path.length() - 4);
21
+ }
18
22
  if (StringUtil::EndsWith(lower_path, ".gz")) {
19
23
  compression = FileCompressionType::GZIP;
20
24
  } else if (StringUtil::EndsWith(lower_path, ".zst")) {
@@ -11,7 +11,7 @@ std::wstring WindowsUtil::UTF8ToUnicode(const char *input) {
11
11
  if (result_size == 0) {
12
12
  throw IOException("Failure in MultiByteToWideChar");
13
13
  }
14
- auto buffer = unique_ptr<wchar_t[]>(new wchar_t[result_size]);
14
+ auto buffer = make_unsafe_array<wchar_t>(result_size);
15
15
  result_size = MultiByteToWideChar(CP_UTF8, 0, input, -1, buffer.get(), result_size);
16
16
  if (result_size == 0) {
17
17
  throw IOException("Failure in MultiByteToWideChar");
@@ -26,7 +26,7 @@ static string WideCharToMultiByteWrapper(LPCWSTR input, uint32_t code_page) {
26
26
  if (result_size == 0) {
27
27
  throw IOException("Failure in WideCharToMultiByte");
28
28
  }
29
- auto buffer = unique_ptr<char[]>(new char[result_size]);
29
+ auto buffer = make_unsafe_array<char>(result_size);
30
30
  result_size = WideCharToMultiByte(code_page, 0, input, -1, buffer.get(), result_size, 0, 0);
31
31
  if (result_size == 0) {
32
32
  throw IOException("Failure in WideCharToMultiByte");
@@ -131,18 +131,21 @@ unique_ptr<FunctionData> StringAggBind(ClientContext &context, AggregateFunction
131
131
  throw BinderException("Separator argument to StringAgg must be a constant");
132
132
  }
133
133
  auto separator_val = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
134
+ string separator_string = ",";
134
135
  if (separator_val.IsNull()) {
135
136
  arguments[0] = make_uniq<BoundConstantExpression>(Value(LogicalType::VARCHAR));
137
+ } else {
138
+ separator_string = separator_val.ToString();
136
139
  }
137
140
  Function::EraseArgument(function, arguments, arguments.size() - 1);
138
- return make_uniq<StringAggBindData>(separator_val.ToString());
141
+ return make_uniq<StringAggBindData>(std::move(separator_string));
139
142
  }
140
143
 
141
144
  static void StringAggSerialize(FieldWriter &writer, const FunctionData *bind_data_p,
142
145
  const AggregateFunction &function) {
143
146
  D_ASSERT(bind_data_p);
144
- auto bind_data = (StringAggBindData *)bind_data_p;
145
- writer.WriteString(bind_data->sep);
147
+ auto bind_data = bind_data_p->Cast<StringAggBindData>();
148
+ writer.WriteString(bind_data.sep);
146
149
  }
147
150
 
148
151
  unique_ptr<FunctionData> StringAggDeserialize(ClientContext &context, FieldReader &reader,
@@ -179,7 +179,7 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
179
179
 
180
180
  // state_buffer holds the state for each list of this chunk
181
181
  idx_t size = aggr.function.state_size();
182
- auto state_buffer = unique_ptr<data_t[]>(new data_t[size * count]);
182
+ auto state_buffer = make_unsafe_array<data_t>(size * count);
183
183
 
184
184
  // state vector for initialize and finalize
185
185
  StateVector state_vector(count, info.aggr_expr->Copy());
@@ -344,19 +344,16 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
344
344
  }
345
345
 
346
346
  static void ListAggregateFunction(DataChunk &args, ExpressionState &state, Vector &result) {
347
-
348
- D_ASSERT(args.ColumnCount() == 2);
347
+ D_ASSERT(args.ColumnCount() >= 2);
349
348
  ListAggregatesFunction<AggregateFunctor, true>(args, state, result);
350
349
  }
351
350
 
352
351
  static void ListDistinctFunction(DataChunk &args, ExpressionState &state, Vector &result) {
353
-
354
352
  D_ASSERT(args.ColumnCount() == 1);
355
353
  ListAggregatesFunction<DistinctFunctor>(args, state, result);
356
354
  }
357
355
 
358
356
  static void ListUniqueFunction(DataChunk &args, ExpressionState &state, Vector &result) {
359
-
360
357
  D_ASSERT(args.ColumnCount() == 1);
361
358
  ListAggregatesFunction<UniqueFunctor>(args, state, result);
362
359
  }
@@ -94,7 +94,7 @@ static void PrintfFunction(DataChunk &args, ExpressionState &state, Vector &resu
94
94
 
95
95
  // now gather all the format arguments
96
96
  vector<duckdb_fmt::basic_format_arg<CTX>> format_args;
97
- vector<unique_ptr<data_t[]>> string_args;
97
+ vector<unsafe_array_ptr<data_t>> string_args;
98
98
 
99
99
  for (idx_t col_idx = 1; col_idx < args.ColumnCount(); col_idx++) {
100
100
  auto &col = args.data[col_idx];
@@ -390,7 +390,7 @@ idx_t GroupedAggregateHashTable::FindOrCreateGroupsInternal(AggregateHTAppendSta
390
390
  }
391
391
  TupleDataCollection::ToUnifiedFormat(state.chunk_state, state.group_chunk);
392
392
  if (!state.group_data) {
393
- state.group_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[state.group_chunk.ColumnCount()]);
393
+ state.group_data = make_unsafe_array<UnifiedVectorFormat>(state.group_chunk.ColumnCount());
394
394
  }
395
395
  TupleDataCollection::GetVectorData(state.chunk_state, state.group_data.get());
396
396
 
@@ -142,7 +142,7 @@ static idx_t FilterNullValues(UnifiedVectorFormat &vdata, const SelectionVector
142
142
  return result_count;
143
143
  }
144
144
 
145
- idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unique_ptr<UnifiedVectorFormat[]> &key_data,
145
+ idx_t JoinHashTable::PrepareKeys(DataChunk &keys, unsafe_array_ptr<UnifiedVectorFormat> &key_data,
146
146
  const SelectionVector *&current_sel, SelectionVector &sel, bool build_side) {
147
147
  key_data = keys.ToUnifiedFormat();
148
148
 
@@ -197,7 +197,7 @@ void JoinHashTable::Build(PartitionedTupleDataAppendState &append_state, DataChu
197
197
  }
198
198
 
199
199
  // prepare the keys for processing
200
- unique_ptr<UnifiedVectorFormat[]> key_data;
200
+ unsafe_array_ptr<UnifiedVectorFormat> key_data;
201
201
  const SelectionVector *current_sel;
202
202
  SelectionVector sel(STANDARD_VECTOR_SIZE);
203
203
  idx_t added_count = PrepareKeys(keys, key_data, current_sel, sel, true);
@@ -332,7 +332,7 @@ unique_ptr<ScanStructure> JoinHashTable::InitializeScanStructure(DataChunk &keys
332
332
  auto ss = make_uniq<ScanStructure>(*this);
333
333
 
334
334
  if (join_type != JoinType::INNER) {
335
- ss->found_match = unique_ptr<bool[]>(new bool[STANDARD_VECTOR_SIZE]);
335
+ ss->found_match = make_unsafe_array<bool>(STANDARD_VECTOR_SIZE);
336
336
  memset(ss->found_match.get(), 0, sizeof(bool) * STANDARD_VECTOR_SIZE);
337
337
  }
338
338
 
@@ -39,7 +39,7 @@ struct AggregateState {
39
39
  for (auto &aggregate : aggregate_expressions) {
40
40
  D_ASSERT(aggregate->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
41
41
  auto &aggr = aggregate->Cast<BoundAggregateExpression>();
42
- auto state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]);
42
+ auto state = make_unsafe_array<data_t>(aggr.function.state_size());
43
43
  aggr.function.initialize(state.get());
44
44
  aggregates.push_back(std::move(state));
45
45
  bind_data.push_back(aggr.bind_info.get());
@@ -69,7 +69,7 @@ struct AggregateState {
69
69
  }
70
70
 
71
71
  //! The aggregate values
72
- vector<unique_ptr<data_t[]>> aggregates;
72
+ vector<unsafe_array_ptr<data_t>> aggregates;
73
73
  //! The bind data
74
74
  vector<FunctionData *> bind_data;
75
75
  //! The destructors
@@ -10,7 +10,7 @@ void OuterJoinMarker::Initialize(idx_t count_p) {
10
10
  return;
11
11
  }
12
12
  this->count = count_p;
13
- found_match = unique_ptr<bool[]>(new bool[count]);
13
+ found_match = make_unsafe_array<bool>(count);
14
14
  Reset();
15
15
  }
16
16
 
@@ -25,7 +25,7 @@ bool PerfectHashJoinExecutor::BuildPerfectHashTable(LogicalType &key_type) {
25
25
  }
26
26
 
27
27
  // and for duplicate_checking
28
- bitmap_build_idx = unique_ptr<bool[]>(new bool[build_size]);
28
+ bitmap_build_idx = make_unsafe_array<bool>(build_size);
29
29
  memset(bitmap_build_idx.get(), 0, sizeof(bool) * build_size); // set false
30
30
 
31
31
  // Now fill columns with build data
@@ -72,7 +72,7 @@ void PhysicalRangeJoin::GlobalSortedTable::Combine(LocalSortedTable &ltable) {
72
72
  }
73
73
 
74
74
  void PhysicalRangeJoin::GlobalSortedTable::IntializeMatches() {
75
- found_match = unique_ptr<bool[]>(new bool[Count()]);
75
+ found_match = make_unsafe_array<bool>(Count());
76
76
  memset(found_match.get(), 0, sizeof(bool) * Count());
77
77
  }
78
78
 
@@ -34,7 +34,7 @@ string BaseCSVReader::GetLineNumberStr(idx_t line_error, bool is_line_estimated,
34
34
 
35
35
  BaseCSVReader::BaseCSVReader(ClientContext &context_p, BufferedCSVReaderOptions options_p,
36
36
  const vector<LogicalType> &requested_types)
37
- : context(context_p), fs(FileSystem::GetFileSystem(context)), allocator(Allocator::Get(context)),
37
+ : context(context_p), fs(FileSystem::GetFileSystem(context)), allocator(BufferAllocator::Get(context)),
38
38
  options(std::move(options_p)) {
39
39
  }
40
40
 
@@ -42,12 +42,7 @@ BaseCSVReader::~BaseCSVReader() {
42
42
  }
43
43
 
44
44
  unique_ptr<CSVFileHandle> BaseCSVReader::OpenCSV(const BufferedCSVReaderOptions &options_p) {
45
- auto file_handle = fs.OpenFile(options_p.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK,
46
- options_p.compression);
47
- if (file_handle->CanSeek()) {
48
- file_handle->Reset();
49
- }
50
- return make_uniq<CSVFileHandle>(std::move(file_handle));
45
+ return CSVFileHandle::OpenFile(fs, allocator, options_p.file_path, options_p.compression, true);
51
46
  }
52
47
 
53
48
  void BaseCSVReader::InitParseChunk(idx_t num_cols) {
@@ -239,18 +239,13 @@ void BufferedCSVReader::Initialize(const vector<LogicalType> &requested_types) {
239
239
  if (return_types.empty()) {
240
240
  throw InvalidInputException("Failed to detect column types from CSV: is the file a valid CSV file?");
241
241
  }
242
- if (cached_chunks.empty()) {
243
- JumpToBeginning(options.skip_rows, options.header);
244
- }
242
+ JumpToBeginning(options.skip_rows, options.header);
245
243
  } else {
246
244
  return_types = requested_types;
247
245
  ResetBuffer();
248
246
  SkipRowsAndReadHeader(options.skip_rows, options.header);
249
247
  }
250
248
  InitParseChunk(return_types.size());
251
- // we only need reset support during the automatic CSV type detection
252
- // since reset support might require caching (in the case of streams), we disable it for the remainder
253
- file_handle->DisableReset();
254
249
  }
255
250
 
256
251
  void BufferedCSVReader::ResetBuffer() {
@@ -262,13 +257,7 @@ void BufferedCSVReader::ResetBuffer() {
262
257
  }
263
258
 
264
259
  void BufferedCSVReader::ResetStream() {
265
- if (!file_handle->CanSeek()) {
266
- // seeking to the beginning appears to not be supported in all compiler/os-scenarios,
267
- // so we have to create a new stream source here for now
268
- file_handle->Reset();
269
- } else {
270
- file_handle->Seek(0);
271
- }
260
+ file_handle->Reset();
272
261
  linenr = 0;
273
262
  linenr_estimated = false;
274
263
  bytes_per_line_avg = 0;
@@ -332,7 +321,7 @@ bool BufferedCSVReader::JumpToNextSample() {
332
321
 
333
322
  // if we deal with any other sources than plaintext files, jumping_samples can be tricky. In that case
334
323
  // we just read x continuous chunks from the stream TODO: make jumps possible for zipfiles.
335
- if (!file_handle->PlainFileSource() || !jumping_samples) {
324
+ if (!file_handle->OnDiskFile() || !jumping_samples) {
336
325
  sample_chunk_idx++;
337
326
  return true;
338
327
  }
@@ -802,21 +791,6 @@ vector<LogicalType> BufferedCSVReader::RefineTypeDetection(const vector<LogicalT
802
791
  }
803
792
  }
804
793
  }
805
-
806
- if (!jumping_samples) {
807
- if ((sample_chunk_idx)*options.sample_chunk_size <= options.buffer_size) {
808
- // cache parse chunk
809
- // create a new chunk and fill it with the remainder
810
- auto chunk = make_uniq<DataChunk>();
811
- auto parse_chunk_types = parse_chunk.GetTypes();
812
- chunk->Move(parse_chunk);
813
- cached_chunks.push(std::move(chunk));
814
- } else {
815
- while (!cached_chunks.empty()) {
816
- cached_chunks.pop();
817
- }
818
- }
819
- }
820
794
  }
821
795
 
822
796
  // set sql types
@@ -1445,7 +1419,7 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start, idx_t &line_start) {
1445
1419
  GetLineNumberStr(linenr, linenr_estimated));
1446
1420
  }
1447
1421
 
1448
- buffer = unique_ptr<char[]>(new char[buffer_read_size + remaining + 1]);
1422
+ buffer = make_unsafe_array<char>(buffer_read_size + remaining + 1);
1449
1423
  buffer_size = remaining + buffer_read_size;
1450
1424
  if (remaining > 0) {
1451
1425
  // remaining from last buffer: copy it here
@@ -1474,17 +1448,6 @@ bool BufferedCSVReader::ReadBuffer(idx_t &start, idx_t &line_start) {
1474
1448
  }
1475
1449
 
1476
1450
  void BufferedCSVReader::ParseCSV(DataChunk &insert_chunk) {
1477
- // if no auto-detect or auto-detect with jumping samples, we have nothing cached and start from the beginning
1478
- if (cached_chunks.empty()) {
1479
- cached_buffers.clear();
1480
- } else {
1481
- auto &chunk = cached_chunks.front();
1482
- parse_chunk.Move(*chunk);
1483
- cached_chunks.pop();
1484
- Flush(insert_chunk);
1485
- return;
1486
- }
1487
-
1488
1451
  string error_message;
1489
1452
  if (!TryParseCSV(ParserMode::PARSING, insert_chunk, error_message)) {
1490
1453
  throw InvalidInputException(error_message);
@@ -0,0 +1,158 @@
1
+ #include "duckdb/execution/operator/persistent/csv_file_handle.hpp"
2
+
3
+ namespace duckdb {
4
+
5
+ CSVFileHandle::CSVFileHandle(FileSystem &fs, Allocator &allocator, unique_ptr<FileHandle> file_handle_p,
6
+ const string &path_p, FileCompressionType compression, bool enable_reset)
7
+ : fs(fs), allocator(allocator), file_handle(std::move(file_handle_p)), path(path_p), compression(compression),
8
+ reset_enabled(enable_reset) {
9
+ can_seek = file_handle->CanSeek();
10
+ on_disk_file = file_handle->OnDiskFile();
11
+ file_size = file_handle->GetFileSize();
12
+ }
13
+
14
+ unique_ptr<FileHandle> CSVFileHandle::OpenFileHandle(FileSystem &fs, Allocator &allocator, const string &path,
15
+ FileCompressionType compression) {
16
+ auto file_handle = fs.OpenFile(path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK, compression);
17
+ if (file_handle->CanSeek()) {
18
+ file_handle->Reset();
19
+ }
20
+ return file_handle;
21
+ }
22
+
23
+ unique_ptr<CSVFileHandle> CSVFileHandle::OpenFile(FileSystem &fs, Allocator &allocator, const string &path,
24
+ FileCompressionType compression, bool enable_reset) {
25
+ auto file_handle = CSVFileHandle::OpenFileHandle(fs, allocator, path, compression);
26
+ return make_uniq<CSVFileHandle>(fs, allocator, std::move(file_handle), path, compression, enable_reset);
27
+ }
28
+
29
+ bool CSVFileHandle::CanSeek() {
30
+ return can_seek;
31
+ }
32
+
33
+ void CSVFileHandle::Seek(idx_t position) {
34
+ if (!can_seek) {
35
+ throw InternalException("Cannot seek in this file");
36
+ }
37
+ file_handle->Seek(position);
38
+ }
39
+
40
+ idx_t CSVFileHandle::SeekPosition() {
41
+ if (!can_seek) {
42
+ throw InternalException("Cannot seek in this file");
43
+ }
44
+ return file_handle->SeekPosition();
45
+ }
46
+
47
+ void CSVFileHandle::Reset() {
48
+ requested_bytes = 0;
49
+ read_position = 0;
50
+ if (can_seek) {
51
+ // we can seek - reset the file handle
52
+ file_handle->Reset();
53
+ } else if (on_disk_file) {
54
+ // we cannot seek but it is an on-disk file - re-open the file
55
+ file_handle = CSVFileHandle::OpenFileHandle(fs, allocator, path, compression);
56
+ } else {
57
+ if (!reset_enabled) {
58
+ throw InternalException("Reset called but reset is not enabled for this CSV Handle");
59
+ }
60
+ read_position = 0;
61
+ }
62
+ }
63
+ bool CSVFileHandle::OnDiskFile() {
64
+ return on_disk_file;
65
+ }
66
+
67
+ idx_t CSVFileHandle::FileSize() {
68
+ return file_size;
69
+ }
70
+
71
+ bool CSVFileHandle::FinishedReading() {
72
+ return requested_bytes >= file_size;
73
+ }
74
+
75
+ idx_t CSVFileHandle::Read(void *buffer, idx_t nr_bytes) {
76
+ requested_bytes += nr_bytes;
77
+ if (on_disk_file || can_seek) {
78
+ // if this is a plain file source OR we can seek we are not caching anything
79
+ return file_handle->Read(buffer, nr_bytes);
80
+ }
81
+ // not a plain file source: we need to do some bookkeeping around the reset functionality
82
+ idx_t result_offset = 0;
83
+ if (read_position < buffer_size) {
84
+ // we need to read from our cached buffer
85
+ auto buffer_read_count = MinValue<idx_t>(nr_bytes, buffer_size - read_position);
86
+ memcpy(buffer, cached_buffer.get() + read_position, buffer_read_count);
87
+ result_offset += buffer_read_count;
88
+ read_position += buffer_read_count;
89
+ if (result_offset == nr_bytes) {
90
+ return nr_bytes;
91
+ }
92
+ } else if (!reset_enabled && cached_buffer.IsSet()) {
93
+ // reset is disabled, but we still have cached data
94
+ // we can remove any cached data
95
+ cached_buffer.Reset();
96
+ buffer_size = 0;
97
+ buffer_capacity = 0;
98
+ read_position = 0;
99
+ }
100
+ // we have data left to read from the file
101
+ // read directly into the buffer
102
+ auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
103
+ file_size = file_handle->GetFileSize();
104
+ read_position += bytes_read;
105
+ if (reset_enabled) {
106
+ // if reset caching is enabled, we need to cache the bytes that we have read
107
+ if (buffer_size + bytes_read >= buffer_capacity) {
108
+ // no space; first enlarge the buffer
109
+ buffer_capacity = MaxValue<idx_t>(NextPowerOfTwo(buffer_size + bytes_read), buffer_capacity * 2);
110
+
111
+ auto new_buffer = allocator.Allocate(buffer_capacity);
112
+ if (buffer_size > 0) {
113
+ memcpy(new_buffer.get(), cached_buffer.get(), buffer_size);
114
+ }
115
+ cached_buffer = std::move(new_buffer);
116
+ }
117
+ memcpy(cached_buffer.get() + buffer_size, (char *)buffer + result_offset, bytes_read);
118
+ buffer_size += bytes_read;
119
+ }
120
+
121
+ return result_offset + bytes_read;
122
+ }
123
+
124
+ string CSVFileHandle::ReadLine() {
125
+ bool carriage_return = false;
126
+ string result;
127
+ char buffer[1];
128
+ while (true) {
129
+ idx_t bytes_read = Read(buffer, 1);
130
+ if (bytes_read == 0) {
131
+ return result;
132
+ }
133
+ if (carriage_return) {
134
+ if (buffer[0] != '\n') {
135
+ if (!file_handle->CanSeek()) {
136
+ throw BinderException(
137
+ "Carriage return newlines not supported when reading CSV files in which we cannot seek");
138
+ }
139
+ file_handle->Seek(file_handle->SeekPosition() - 1);
140
+ return result;
141
+ }
142
+ }
143
+ if (buffer[0] == '\n') {
144
+ return result;
145
+ }
146
+ if (buffer[0] != '\r') {
147
+ result += buffer[0];
148
+ } else {
149
+ carriage_return = true;
150
+ }
151
+ }
152
+ }
153
+
154
+ void CSVFileHandle::DisableReset() {
155
+ this->reset_enabled = false;
156
+ }
157
+
158
+ } // namespace duckdb
@@ -19,7 +19,7 @@ PhysicalPivot::PhysicalPivot(vector<LogicalType> types_p, unique_ptr<PhysicalOpe
19
19
  for (auto &aggr_expr : bound_pivot.aggregates) {
20
20
  auto &aggr = (BoundAggregateExpression &)*aggr_expr;
21
21
  // for each aggregate, initialize an empty aggregate state and finalize it immediately
22
- auto state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]);
22
+ auto state = make_unsafe_array<data_t>(aggr.function.state_size());
23
23
  aggr.function.initialize(state.get());
24
24
  Vector state_vector(Value::POINTER((uintptr_t)state.get()));
25
25
  Vector result_vector(aggr_expr->return_type);
@@ -7,9 +7,9 @@
7
7
 
8
8
  namespace duckdb {
9
9
 
10
- PhysicalCreateType::PhysicalCreateType(unique_ptr<CreateTypeInfo> info, idx_t estimated_cardinality)
10
+ PhysicalCreateType::PhysicalCreateType(unique_ptr<CreateTypeInfo> info_p, idx_t estimated_cardinality)
11
11
  : PhysicalOperator(PhysicalOperatorType::CREATE_TYPE, {LogicalType::BIGINT}, estimated_cardinality),
12
- info(std::move(info)) {
12
+ info(std::move(info_p)) {
13
13
  }
14
14
 
15
15
  //===--------------------------------------------------------------------===//
@@ -23,11 +23,11 @@ PerfectAggregateHashTable::PerfectAggregateHashTable(ClientContext &context, All
23
23
  tuple_size = layout.GetRowWidth();
24
24
 
25
25
  // allocate and null initialize the data
26
- owned_data = unique_ptr<data_t[]>(new data_t[tuple_size * total_groups]);
26
+ owned_data = make_unsafe_array<data_t>(tuple_size * total_groups);
27
27
  data = owned_data.get();
28
28
 
29
29
  // set up the empty payloads for every tuple, and initialize the "occupied" flag to false
30
- group_is_set = unique_ptr<bool[]>(new bool[total_groups]);
30
+ group_is_set = make_unsafe_array<bool>(total_groups);
31
31
  memset(group_is_set.get(), 0, total_groups * sizeof(bool));
32
32
 
33
33
  // initialize the hash table for each entry
@@ -334,7 +334,7 @@ public:
334
334
  //! The current position to scan the HT for output tuples
335
335
  idx_t ht_index;
336
336
  //! The set of aggregate scan states
337
- unique_ptr<TupleDataParallelScanState[]> ht_scan_states;
337
+ unsafe_array_ptr<TupleDataParallelScanState> ht_scan_states;
338
338
  atomic<bool> initialized;
339
339
  atomic<bool> finished;
340
340
  };
@@ -404,7 +404,7 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
404
404
  for (idx_t i = 0; i < op.aggregates.size(); i++) {
405
405
  D_ASSERT(op.aggregates[i]->GetExpressionClass() == ExpressionClass::BOUND_AGGREGATE);
406
406
  auto &aggr = op.aggregates[i]->Cast<BoundAggregateExpression>();
407
- auto aggr_state = unique_ptr<data_t[]>(new data_t[aggr.function.state_size()]);
407
+ auto aggr_state = make_unsafe_array<data_t>(aggr.function.state_size());
408
408
  aggr.function.initialize(aggr_state.get());
409
409
 
410
410
  AggregateInputData aggr_input_data(aggr.bind_info.get(), Allocator::DefaultAllocator());
@@ -433,8 +433,7 @@ SourceResultType RadixPartitionedHashTable::GetData(ExecutionContext &context, D
433
433
  lock_guard<mutex> l(state.lock);
434
434
  if (!state.initialized) {
435
435
  auto &finalized_hts = gstate.finalized_hts;
436
- state.ht_scan_states =
437
- unique_ptr<TupleDataParallelScanState[]>(new TupleDataParallelScanState[finalized_hts.size()]);
436
+ state.ht_scan_states = make_unsafe_array<TupleDataParallelScanState>(finalized_hts.size());
438
437
 
439
438
  const auto &layout = gstate.finalized_hts[0]->GetDataCollection().GetLayout();
440
439
  vector<column_t> column_ids;
@@ -309,7 +309,7 @@ void WindowSegmentTree::ConstructTree() {
309
309
  level_nodes = (level_nodes + (TREE_FANOUT - 1)) / TREE_FANOUT;
310
310
  internal_nodes += level_nodes;
311
311
  } while (level_nodes > 1);
312
- levels_flat_native = unique_ptr<data_t[]>(new data_t[internal_nodes * state.size()]);
312
+ levels_flat_native = make_unsafe_array<data_t>(internal_nodes * state.size());
313
313
  levels_flat_start.push_back(0);
314
314
 
315
315
  idx_t levels_flat_offset = 0;
@@ -5,6 +5,7 @@
5
5
  #include "duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp"
6
6
  #include "duckdb/common/string_util.hpp"
7
7
  #include "duckdb/function/scalar_macro_function.hpp"
8
+ #include "duckdb/function/table_macro_function.hpp"
8
9
  #include "duckdb/parser/expression/columnref_expression.hpp"
9
10
  #include "duckdb/parser/expression/comparison_expression.hpp"
10
11
  #include "duckdb/parser/expression/function_expression.hpp"
@@ -91,4 +92,46 @@ string MacroFunction::ToSQL(const string &schema, const string &name) const {
91
92
  return StringUtil::Format("CREATE MACRO %s.%s(%s) AS ", schema, name, StringUtil::Join(param_strings, ", "));
92
93
  }
93
94
 
95
+ void MacroFunction::Serialize(Serializer &main_serializer) const {
96
+ FieldWriter writer(main_serializer);
97
+ writer.WriteField(type);
98
+ writer.WriteSerializableList(parameters);
99
+ writer.WriteField<uint32_t>((uint32_t)default_parameters.size());
100
+ auto &serializer = writer.GetSerializer();
101
+ for (auto &kv : default_parameters) {
102
+ serializer.WriteString(kv.first);
103
+ kv.second->Serialize(serializer);
104
+ }
105
+ SerializeInternal(writer);
106
+ writer.Finalize();
107
+ }
108
+
109
+ unique_ptr<MacroFunction> MacroFunction::Deserialize(Deserializer &main_source) {
110
+ FieldReader reader(main_source);
111
+ auto type = reader.ReadRequired<MacroType>();
112
+ auto parameters = reader.ReadRequiredSerializableList<ParsedExpression>();
113
+ auto default_param_count = reader.ReadRequired<uint32_t>();
114
+ unordered_map<string, unique_ptr<ParsedExpression>> default_parameters;
115
+ auto &source = reader.GetSource();
116
+ for (idx_t i = 0; i < default_param_count; i++) {
117
+ auto name = source.Read<string>();
118
+ default_parameters[name] = ParsedExpression::Deserialize(source);
119
+ }
120
+ unique_ptr<MacroFunction> result;
121
+ switch (type) {
122
+ case MacroType::SCALAR_MACRO:
123
+ result = ScalarMacroFunction::Deserialize(reader);
124
+ break;
125
+ case MacroType::TABLE_MACRO:
126
+ result = TableMacroFunction::Deserialize(reader);
127
+ break;
128
+ default:
129
+ throw InternalException("Cannot deserialize macro type");
130
+ }
131
+ result->parameters = std::move(parameters);
132
+ result->default_parameters = std::move(default_parameters);
133
+ reader.Finalize();
134
+ return result;
135
+ }
136
+
94
137
  } // namespace duckdb
@@ -139,7 +139,7 @@ string PragmaImportDatabase(ClientContext &context, const FunctionParameters &pa
139
139
  auto handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
140
140
  FileSystem::DEFAULT_COMPRESSION);
141
141
  auto fsize = fs.GetFileSize(*handle);
142
- auto buffer = unique_ptr<char[]>(new char[fsize]);
142
+ auto buffer = make_unsafe_array<char>(fsize);
143
143
  fs.Read(*handle, buffer.get(), fsize);
144
144
  auto query = string(buffer.get(), fsize);
145
145
  // Replace the placeholder with the path provided to IMPORT
@@ -408,7 +408,7 @@ string StrfTimeFormat::Format(timestamp_t timestamp, const string &format_str) {
408
408
  auto time = Timestamp::GetTime(timestamp);
409
409
 
410
410
  auto len = format.GetLength(date, time, 0, nullptr);
411
- auto result = unique_ptr<char[]>(new char[len]);
411
+ auto result = make_unsafe_array<char>(len);
412
412
  format.FormatString(date, time, result.get());
413
413
  return string(result.get(), len);
414
414
  }
@@ -417,6 +417,7 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
417
417
  if (format_string.empty()) {
418
418
  return "Empty format string";
419
419
  }
420
+ format.format_specifier = format_string;
420
421
  format.specifiers.clear();
421
422
  format.literals.clear();
422
423
  format.numeric_width.clear();
@@ -118,7 +118,7 @@ static void TemplatedConcatWS(DataChunk &args, string_t *sep_data, const Selecti
118
118
  const SelectionVector &rsel, idx_t count, Vector &result) {
119
119
  vector<idx_t> result_lengths(args.size(), 0);
120
120
  vector<bool> has_results(args.size(), false);
121
- auto orrified_data = unique_ptr<UnifiedVectorFormat[]>(new UnifiedVectorFormat[args.ColumnCount() - 1]);
121
+ auto orrified_data = make_unsafe_array<UnifiedVectorFormat>(args.ColumnCount() - 1);
122
122
  for (idx_t col_idx = 1; col_idx < args.ColumnCount(); col_idx++) {
123
123
  args.data[col_idx].ToUnifiedFormat(args.size(), orrified_data[col_idx - 1]);
124
124
  }
@@ -395,11 +395,11 @@ bool ILikeOperatorFunction(string_t &str, string_t &pattern, char escape = '\0')
395
395
 
396
396
  // lowercase both the str and the pattern
397
397
  idx_t str_llength = LowerFun::LowerLength(str_data, str_size);
398
- auto str_ldata = unique_ptr<char[]>(new char[str_llength]);
398
+ auto str_ldata = make_unsafe_array<char>(str_llength);
399
399
  LowerFun::LowerCase(str_data, str_size, str_ldata.get());
400
400
 
401
401
  idx_t pat_llength = LowerFun::LowerLength(pat_data, pat_size);
402
- auto pat_ldata = unique_ptr<char[]>(new char[pat_llength]);
402
+ auto pat_ldata = make_unsafe_array<char>(pat_llength);
403
403
  LowerFun::LowerCase(pat_data, pat_size, pat_ldata.get());
404
404
  string_t str_lcase(str_ldata.get(), str_llength);
405
405
  string_t pat_lcase(pat_ldata.get(), pat_llength);