duckdb 1.1.0 → 1.1.1-dev3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/binding.gyp +2 -1
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  4. package/src/duckdb/extension/json/include/json_common.hpp +14 -4
  5. package/src/duckdb/extension/json/include/json_executors.hpp +11 -3
  6. package/src/duckdb/extension/json/json_extension.cpp +1 -1
  7. package/src/duckdb/extension/json/json_functions/json_extract.cpp +11 -3
  8. package/src/duckdb/extension/json/json_functions/json_value.cpp +4 -3
  9. package/src/duckdb/extension/json/json_functions.cpp +16 -7
  10. package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
  11. package/src/duckdb/extension/parquet/column_writer.cpp +54 -43
  12. package/src/duckdb/extension/parquet/geo_parquet.cpp +19 -0
  13. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +10 -6
  14. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +3 -3
  15. package/src/duckdb/extension/parquet/parquet_writer.cpp +2 -1
  16. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  17. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +1 -0
  18. package/src/duckdb/src/common/arrow/arrow_util.cpp +60 -0
  19. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +1 -53
  20. package/src/duckdb/src/common/cgroups.cpp +15 -24
  21. package/src/duckdb/src/common/constants.cpp +8 -0
  22. package/src/duckdb/src/common/enum_util.cpp +331 -326
  23. package/src/duckdb/src/common/http_util.cpp +5 -1
  24. package/src/duckdb/src/common/operator/cast_operators.cpp +6 -60
  25. package/src/duckdb/src/common/types/bit.cpp +1 -1
  26. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -1
  27. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +2 -1
  28. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +5 -0
  29. package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +1 -1
  30. package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -1
  31. package/src/duckdb/src/execution/index/art/iterator.cpp +17 -15
  32. package/src/duckdb/src/execution/index/art/prefix.cpp +9 -34
  33. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +4 -3
  34. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +1 -0
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -1
  36. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +2 -2
  37. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +23 -1
  38. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +33 -4
  39. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +23 -13
  40. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +23 -19
  41. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +12 -11
  42. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -14
  43. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +4 -4
  44. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +3 -1
  45. package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -1
  46. package/src/duckdb/src/function/cast/decimal_cast.cpp +33 -3
  47. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +9 -0
  48. package/src/duckdb/src/function/table/arrow.cpp +34 -22
  49. package/src/duckdb/src/function/table/sniff_csv.cpp +4 -1
  50. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  51. package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +31 -0
  52. package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +2 -16
  53. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +60 -0
  54. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -0
  55. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +0 -1
  56. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +2 -1
  57. package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +9 -5
  58. package/src/duckdb/src/include/duckdb/execution/executor.hpp +1 -0
  59. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +5 -2
  60. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +5 -1
  61. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +5 -5
  62. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -0
  63. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -0
  64. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/main/extension.hpp +1 -0
  66. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +14 -5
  67. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -1
  68. package/src/duckdb/src/include/duckdb/main/settings.hpp +4 -2
  69. package/src/duckdb/src/include/duckdb/parser/keyword_helper.hpp +3 -0
  70. package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +7 -1
  72. package/src/duckdb/src/include/duckdb/planner/binder.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -1
  75. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -1
  76. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +7 -4
  77. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +2 -2
  78. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +2 -1
  79. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +4 -4
  80. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -4
  81. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +4 -2
  83. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +1 -1
  84. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +1 -0
  85. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -0
  86. package/src/duckdb/src/include/duckdb/transaction/transaction_manager.hpp +1 -1
  87. package/src/duckdb/src/include/duckdb.h +8 -8
  88. package/src/duckdb/src/main/appender.cpp +1 -1
  89. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +3 -3
  90. package/src/duckdb/src/main/capi/helper-c.cpp +4 -0
  91. package/src/duckdb/src/main/config.cpp +24 -11
  92. package/src/duckdb/src/main/database.cpp +6 -5
  93. package/src/duckdb/src/main/extension/extension_install.cpp +13 -8
  94. package/src/duckdb/src/main/extension/extension_load.cpp +10 -4
  95. package/src/duckdb/src/main/extension.cpp +1 -1
  96. package/src/duckdb/src/optimizer/filter_pushdown.cpp +10 -1
  97. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +9 -5
  98. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +14 -8
  99. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -0
  100. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -0
  101. package/src/duckdb/src/optimizer/optimizer.cpp +4 -1
  102. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -11
  103. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +1 -7
  104. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
  105. package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +3 -0
  106. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
  107. package/src/duckdb/src/parser/keyword_helper.cpp +4 -0
  108. package/src/duckdb/src/parser/parser.cpp +20 -18
  109. package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +8 -3
  110. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -0
  111. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +7 -1
  112. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +13 -0
  113. package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -11
  114. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +27 -10
  115. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +24 -9
  116. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +1 -3
  117. package/src/duckdb/src/planner/binder.cpp +5 -6
  118. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +1 -0
  119. package/src/duckdb/src/planner/expression_binder/select_binder.cpp +9 -0
  120. package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +2 -2
  121. package/src/duckdb/src/planner/operator/logical_positional_join.cpp +1 -0
  122. package/src/duckdb/src/storage/buffer/block_handle.cpp +18 -21
  123. package/src/duckdb/src/storage/buffer/block_manager.cpp +12 -4
  124. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
  125. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +12 -2
  126. package/src/duckdb/src/storage/buffer_manager.cpp +3 -2
  127. package/src/duckdb/src/storage/compression/rle.cpp +5 -2
  128. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -1
  129. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +8 -7
  130. package/src/duckdb/src/storage/standard_buffer_manager.cpp +19 -20
  131. package/src/duckdb/src/storage/statistics/column_statistics.cpp +1 -2
  132. package/src/duckdb/src/storage/table/column_data.cpp +5 -2
  133. package/src/duckdb/src/storage/table/column_segment.cpp +2 -2
  134. package/src/duckdb/src/storage/table/row_group_collection.cpp +18 -14
  135. package/src/duckdb/src/storage/table/standard_column_data.cpp +3 -3
  136. package/src/duckdb/src/storage/wal_replay.cpp +2 -3
  137. package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -0
  138. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
  139. package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -2
  140. package/src/duckdb/third_party/libpg_query/include/pg_simplified_token.hpp +6 -4
  141. package/src/duckdb/third_party/libpg_query/include/postgres_parser.hpp +1 -1
  142. package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +1 -1
  143. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +801 -799
  144. package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +6 -2
  145. package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +0 -1
  146. package/src/duckdb/ub_src_common_arrow.cpp +2 -0
  147. package/vendor.py +1 -2
@@ -6,7 +6,11 @@
6
6
  namespace duckdb {
7
7
 
8
8
  void HTTPUtil::ParseHTTPProxyHost(string &proxy_value, string &hostname_out, idx_t &port_out, idx_t default_port) {
9
- auto proxy_split = StringUtil::Split(proxy_value, ":");
9
+ auto sanitized_proxy_value = proxy_value;
10
+ if (StringUtil::StartsWith(proxy_value, "http://")) {
11
+ sanitized_proxy_value = proxy_value.substr(7);
12
+ }
13
+ auto proxy_split = StringUtil::Split(sanitized_proxy_value, ":");
10
14
  if (proxy_split.size() == 1) {
11
15
  hostname_out = proxy_split[0];
12
16
  port_out = default_port;
@@ -920,68 +920,12 @@ bool TryCast::Operation(double input, double &result, bool strict) {
920
920
  //===--------------------------------------------------------------------===//
921
921
  // Cast String -> Numeric
922
922
  //===--------------------------------------------------------------------===//
923
+
923
924
  template <>
924
925
  bool TryCast::Operation(string_t input, bool &result, bool strict) {
925
- auto input_data = reinterpret_cast<const uint8_t *>(input.GetData());
926
+ auto input_data = reinterpret_cast<const char *>(input.GetData());
926
927
  auto input_size = input.GetSize();
927
-
928
- switch (input_size) {
929
- case 1: {
930
- unsigned char c = UnsafeNumericCast<uint8_t>(std::tolower(*input_data));
931
- if (c == 't' || (!strict && c == 'y') || (!strict && c == '1')) {
932
- result = true;
933
- return true;
934
- } else if (c == 'f' || (!strict && c == 'n') || (!strict && c == '0')) {
935
- result = false;
936
- return true;
937
- }
938
- return false;
939
- }
940
- case 2: {
941
- unsigned char n = UnsafeNumericCast<uint8_t>(std::tolower(input_data[0]));
942
- unsigned char o = UnsafeNumericCast<uint8_t>(std::tolower(input_data[1]));
943
- if (n == 'n' && o == 'o') {
944
- result = false;
945
- return true;
946
- }
947
- return false;
948
- }
949
- case 3: {
950
- unsigned char y = UnsafeNumericCast<uint8_t>(std::tolower(input_data[0]));
951
- unsigned char e = UnsafeNumericCast<uint8_t>(std::tolower(input_data[1]));
952
- unsigned char s = UnsafeNumericCast<uint8_t>(std::tolower(input_data[2]));
953
- if (y == 'y' && e == 'e' && s == 's') {
954
- result = true;
955
- return true;
956
- }
957
- return false;
958
- }
959
- case 4: {
960
- unsigned char t = UnsafeNumericCast<uint8_t>(std::tolower(input_data[0]));
961
- unsigned char r = UnsafeNumericCast<uint8_t>(std::tolower(input_data[1]));
962
- unsigned char u = UnsafeNumericCast<uint8_t>(std::tolower(input_data[2]));
963
- unsigned char e = UnsafeNumericCast<uint8_t>(std::tolower(input_data[3]));
964
- if (t == 't' && r == 'r' && u == 'u' && e == 'e') {
965
- result = true;
966
- return true;
967
- }
968
- return false;
969
- }
970
- case 5: {
971
- unsigned char f = UnsafeNumericCast<uint8_t>(std::tolower(input_data[0]));
972
- unsigned char a = UnsafeNumericCast<uint8_t>(std::tolower(input_data[1]));
973
- unsigned char l = UnsafeNumericCast<uint8_t>(std::tolower(input_data[2]));
974
- unsigned char s = UnsafeNumericCast<uint8_t>(std::tolower(input_data[3]));
975
- unsigned char e = UnsafeNumericCast<uint8_t>(std::tolower(input_data[4]));
976
- if (f == 'f' && a == 'a' && l == 'l' && s == 's' && e == 'e') {
977
- result = false;
978
- return true;
979
- }
980
- return false;
981
- }
982
- default:
983
- return false;
984
- }
928
+ return TryCastStringBool(input_data, input_size, result, strict);
985
929
  }
986
930
  template <>
987
931
  bool TryCast::Operation(string_t input, int8_t &result, bool strict) {
@@ -2298,7 +2242,9 @@ bool TryCastToDecimal::Operation(uhugeint_t input, hugeint_t &result, CastParame
2298
2242
  template <class SRC, class DST>
2299
2243
  bool DoubleToDecimalCast(SRC input, DST &result, CastParameters &parameters, uint8_t width, uint8_t scale) {
2300
2244
  double value = input * NumericHelper::DOUBLE_POWERS_OF_TEN[scale];
2301
- if (value <= -NumericHelper::DOUBLE_POWERS_OF_TEN[width] || value >= NumericHelper::DOUBLE_POWERS_OF_TEN[width]) {
2245
+ double roundedValue = round(value);
2246
+ if (roundedValue <= -NumericHelper::DOUBLE_POWERS_OF_TEN[width] ||
2247
+ roundedValue >= NumericHelper::DOUBLE_POWERS_OF_TEN[width]) {
2302
2248
  string error = StringUtil::Format("Could not cast value %f to DECIMAL(%d,%d)", value, width, scale);
2303
2249
  HandleCastError::AssignError(error, parameters);
2304
2250
  return false;
@@ -180,7 +180,7 @@ void Bit::BitToBlob(string_t bit, string_t &output_blob) {
180
180
  idx_t size = output_blob.GetSize();
181
181
 
182
182
  output[0] = UnsafeNumericCast<char>(GetFirstByte(bit));
183
- if (size > 2) {
183
+ if (size >= 2) {
184
184
  ++output;
185
185
  // First byte in bitstring contains amount of padded bits,
186
186
  // second byte in bitstring is the padded byte,
@@ -2,6 +2,7 @@
2
2
 
3
3
  #include "duckdb/common/types/column/column_data_collection_segment.hpp"
4
4
  #include "duckdb/storage/buffer/block_handle.hpp"
5
+ #include "duckdb/storage/buffer/buffer_pool.hpp"
5
6
  #include "duckdb/storage/buffer_manager.hpp"
6
7
 
7
8
  namespace duckdb {
@@ -45,6 +46,21 @@ ColumnDataAllocator::ColumnDataAllocator(ColumnDataAllocator &other) {
45
46
  }
46
47
  }
47
48
 
49
+ ColumnDataAllocator::~ColumnDataAllocator() {
50
+ if (type == ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR) {
51
+ return;
52
+ }
53
+ for (auto &block : blocks) {
54
+ block.handle->SetDestroyBufferUpon(DestroyBufferUpon::UNPIN);
55
+ }
56
+ const auto data_size = SizeInBytes();
57
+ blocks.clear();
58
+ if (Allocator::SupportsFlush() &&
59
+ data_size > alloc.buffer_manager->GetBufferPool().GetAllocatorBulkDeallocationFlushThreshold()) {
60
+ Allocator::FlushAll();
61
+ }
62
+ }
63
+
48
64
  BufferHandle ColumnDataAllocator::Pin(uint32_t block_id) {
49
65
  D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR || type == ColumnDataAllocatorType::HYBRID);
50
66
  shared_ptr<BlockHandle> handle;
@@ -65,7 +81,8 @@ BufferHandle ColumnDataAllocator::AllocateBlock(idx_t size) {
65
81
  BlockMetaData data;
66
82
  data.size = 0;
67
83
  data.capacity = NumericCast<uint32_t>(max_size);
68
- auto pin = alloc.buffer_manager->Allocate(MemoryTag::COLUMN_DATA, max_size, false, &data.handle);
84
+ auto pin = alloc.buffer_manager->Allocate(MemoryTag::COLUMN_DATA, max_size, false);
85
+ data.handle = pin.GetBlockHandle();
69
86
  blocks.push_back(std::move(data));
70
87
  allocated_size += max_size;
71
88
  return pin;
@@ -11,7 +11,8 @@ namespace duckdb {
11
11
  using ValidityBytes = TupleDataLayout::ValidityBytes;
12
12
 
13
13
  TupleDataBlock::TupleDataBlock(BufferManager &buffer_manager, idx_t capacity_p) : capacity(capacity_p), size(0) {
14
- buffer_manager.Allocate(MemoryTag::HASH_TABLE, capacity, false, &handle);
14
+ auto buffer_handle = buffer_manager.Allocate(MemoryTag::HASH_TABLE, capacity, false);
15
+ handle = buffer_handle.GetBlockHandle();
15
16
  }
16
17
 
17
18
  TupleDataBlock::TupleDataBlock(TupleDataBlock &&other) noexcept : capacity(0), size(0) {
@@ -1,6 +1,7 @@
1
1
  #include "duckdb/common/types/row/tuple_data_segment.hpp"
2
2
 
3
3
  #include "duckdb/common/types/row/tuple_data_allocator.hpp"
4
+ #include "duckdb/storage/buffer/buffer_pool.hpp"
4
5
 
5
6
  namespace duckdb {
6
7
 
@@ -118,6 +119,10 @@ TupleDataSegment::~TupleDataSegment() {
118
119
  }
119
120
  pinned_row_handles.clear();
120
121
  pinned_heap_handles.clear();
122
+ if (Allocator::SupportsFlush() && allocator &&
123
+ data_size > allocator->GetBufferManager().GetBufferPool().GetAllocatorBulkDeallocationFlushThreshold()) {
124
+ Allocator::FlushAll();
125
+ }
121
126
  allocator.reset();
122
127
  }
123
128
 
@@ -192,7 +192,7 @@ struct GenericArgMinMaxState {
192
192
 
193
193
  static void PrepareData(Vector &by, idx_t count, Vector &extra_state, UnifiedVectorFormat &result) {
194
194
  OrderModifiers modifiers(ORDER_TYPE, OrderByNullType::NULLS_LAST);
195
- CreateSortKeyHelpers::CreateSortKey(by, count, modifiers, extra_state);
195
+ CreateSortKeyHelpers::CreateSortKeyWithValidity(by, extra_state, modifiers, count);
196
196
  extra_state.ToUnifiedFormat(count, result);
197
197
  }
198
198
  };
@@ -4,6 +4,7 @@
4
4
  #include "duckdb/common/vector_operations/vector_operations.hpp"
5
5
  #include "duckdb/common/operator/comparison_operators.hpp"
6
6
  #include "duckdb/common/types/null_value.hpp"
7
+ #include "duckdb/main/config.hpp"
7
8
  #include "duckdb/planner/expression.hpp"
8
9
  #include "duckdb/planner/expression/bound_comparison_expression.hpp"
9
10
  #include "duckdb/planner/expression_binder.hpp"
@@ -330,7 +331,7 @@ unique_ptr<FunctionData> BindMinMax(ClientContext &context, AggregateFunction &f
330
331
  vector<unique_ptr<Expression>> &arguments) {
331
332
  if (arguments[0]->return_type.id() == LogicalTypeId::VARCHAR) {
332
333
  auto str_collation = StringType::GetCollation(arguments[0]->return_type);
333
- if (!str_collation.empty()) {
334
+ if (!str_collation.empty() || !DBConfig::GetConfig(context).options.collation.empty()) {
334
335
  // If aggr function is min/max and uses collations, replace bound_function with arg_min/arg_max
335
336
  // to make sure the result's correctness.
336
337
  string function_name = function.name == "min" ? "arg_min" : "arg_max";
@@ -251,11 +251,7 @@ bool Iterator::Next() {
251
251
  }
252
252
 
253
253
  void Iterator::PopNode() {
254
- // We are popping a gate node.
255
- if (nodes.top().node.GetGateStatus() == GateStatus::GATE_SET) {
256
- D_ASSERT(status == GateStatus::GATE_SET);
257
- status = GateStatus::GATE_NOT_SET;
258
- }
254
+ auto gate_status = nodes.top().node.GetGateStatus();
259
255
 
260
256
  // Pop the byte and the node.
261
257
  if (nodes.top().node.GetType() != NType::PREFIX) {
@@ -264,19 +260,25 @@ void Iterator::PopNode() {
264
260
  nested_depth--;
265
261
  D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE);
266
262
  }
267
- nodes.pop();
268
- return;
269
- }
270
263
 
271
- // Pop all prefix bytes and the node.
272
- Prefix prefix(art, nodes.top().node);
273
- auto prefix_byte_count = prefix.data[Prefix::Count(art)];
274
- current_key.Pop(prefix_byte_count);
275
- if (status == GateStatus::GATE_SET) {
276
- nested_depth -= prefix_byte_count;
277
- D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE);
264
+ } else {
265
+ // Pop all prefix bytes and the node.
266
+ Prefix prefix(art, nodes.top().node);
267
+ auto prefix_byte_count = prefix.data[Prefix::Count(art)];
268
+ current_key.Pop(prefix_byte_count);
269
+
270
+ if (status == GateStatus::GATE_SET) {
271
+ nested_depth -= prefix_byte_count;
272
+ D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE);
273
+ }
278
274
  }
279
275
  nodes.pop();
276
+
277
+ // We are popping a gate node.
278
+ if (gate_status == GateStatus::GATE_SET) {
279
+ D_ASSERT(status == GateStatus::GATE_SET);
280
+ status = GateStatus::GATE_NOT_SET;
281
+ }
280
282
  }
281
283
 
282
284
  } // namespace duckdb
@@ -400,42 +400,15 @@ void Prefix::TransformToDeprecated(ART &art, Node &node, unsafe_unique_ptr<Fixed
400
400
  return Node::TransformToDeprecated(art, ref, allocator);
401
401
  }
402
402
 
403
- // Fast path.
404
- if (art.prefix_count <= DEPRECATED_COUNT) {
405
- reference<Node> ref(node);
406
- while (ref.get().GetType() == PREFIX && ref.get().GetGateStatus() == GateStatus::GATE_NOT_SET) {
407
- Prefix prefix(art, ref, true, true);
408
- if (!prefix.in_memory) {
409
- return;
410
- }
411
-
412
- Node new_node;
413
- new_node = allocator->New();
414
- new_node.SetMetadata(static_cast<uint8_t>(PREFIX));
415
-
416
- Prefix new_prefix(allocator, new_node, DEPRECATED_COUNT);
417
- new_prefix.data[DEPRECATED_COUNT] = prefix.data[Count(art)];
418
- memcpy(new_prefix.data, prefix.data, new_prefix.data[DEPRECATED_COUNT]);
419
- *new_prefix.ptr = *prefix.ptr;
420
-
421
- prefix.ptr->Clear();
422
- Node::Free(art, ref);
423
- ref.get() = new_node;
424
- ref = *new_prefix.ptr;
425
- }
426
-
427
- return Node::TransformToDeprecated(art, ref, allocator);
428
- }
429
-
430
- // Else, we need to create a new prefix chain.
403
+ // We need to create a new prefix (chain).
431
404
  Node new_node;
432
405
  new_node = allocator->New();
433
406
  new_node.SetMetadata(static_cast<uint8_t>(PREFIX));
434
407
  Prefix new_prefix(allocator, new_node, DEPRECATED_COUNT);
435
408
 
436
- reference<Node> ref(node);
437
- while (ref.get().GetType() == PREFIX && ref.get().GetGateStatus() == GateStatus::GATE_NOT_SET) {
438
- Prefix prefix(art, ref, true, true);
409
+ Node current_node = node;
410
+ while (current_node.GetType() == PREFIX && current_node.GetGateStatus() == GateStatus::GATE_NOT_SET) {
411
+ Prefix prefix(art, current_node, true, true);
439
412
  if (!prefix.in_memory) {
440
413
  return;
441
414
  }
@@ -445,11 +418,13 @@ void Prefix::TransformToDeprecated(ART &art, Node &node, unsafe_unique_ptr<Fixed
445
418
  }
446
419
 
447
420
  *new_prefix.ptr = *prefix.ptr;
448
- Node::GetAllocator(art, PREFIX).Free(ref);
449
- ref = *new_prefix.ptr;
421
+ prefix.ptr->Clear();
422
+ Node::Free(art, current_node);
423
+ current_node = *new_prefix.ptr;
450
424
  }
451
425
 
452
- return Node::TransformToDeprecated(art, ref, allocator);
426
+ node = new_node;
427
+ return Node::TransformToDeprecated(art, *new_prefix.ptr, allocator);
453
428
  }
454
429
 
455
430
  Prefix Prefix::Append(ART &art, const uint8_t byte) {
@@ -40,7 +40,8 @@ FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager)
40
40
  block_handle(nullptr) {
41
41
 
42
42
  auto &buffer_manager = block_manager.buffer_manager;
43
- buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false, &block_handle);
43
+ buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false);
44
+ block_handle = buffer_handle.GetBlockHandle();
44
45
  }
45
46
 
46
47
  FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const idx_t allocation_size,
@@ -137,8 +138,8 @@ void FixedSizeBuffer::Pin() {
137
138
 
138
139
  // Copy the (partial) data into a new (not yet disk-backed) buffer handle.
139
140
  shared_ptr<BlockHandle> new_block_handle;
140
- auto new_buffer_handle =
141
- buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false, &new_block_handle);
141
+ auto new_buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false);
142
+ new_block_handle = new_buffer_handle.GetBlockHandle();
142
143
  memcpy(new_buffer_handle.Ptr(), buffer_handle.Ptr() + block_pointer.offset, allocation_size);
143
144
 
144
145
  buffer_handle = std::move(new_buffer_handle);
@@ -342,6 +342,7 @@ void LocalUngroupedAggregateState::Sink(DataChunk &payload_chunk, idx_t payload_
342
342
  #endif
343
343
  auto &aggregate = state.aggregate_expressions[aggr_idx]->Cast<BoundAggregateExpression>();
344
344
  idx_t payload_cnt = aggregate.children.size();
345
+ D_ASSERT(payload_idx + payload_cnt <= payload_chunk.data.size());
345
346
  auto start_of_input = payload_cnt == 0 ? nullptr : &payload_chunk.data[payload_idx];
346
347
  AggregateInputData aggr_input_data(state.bind_data[aggr_idx], allocator);
347
348
  aggregate.function.simple_update(start_of_input, aggr_input_data, payload_cnt, state.aggregate_data[aggr_idx].get(),
@@ -54,7 +54,8 @@ void CSVBuffer::AllocateBuffer(idx_t buffer_size) {
54
54
  auto &buffer_manager = BufferManager::GetBufferManager(context);
55
55
  bool can_destroy = !is_pipe;
56
56
  handle = buffer_manager.Allocate(MemoryTag::CSV_READER, MaxValue<idx_t>(buffer_manager.GetBlockSize(), buffer_size),
57
- can_destroy, &block);
57
+ can_destroy);
58
+ block = handle.GetBlockHandle();
58
59
  }
59
60
 
60
61
  idx_t CSVBuffer::GetBufferSize() {
@@ -25,7 +25,7 @@ BaseScanner::BaseScanner(shared_ptr<CSVBufferManager> buffer_manager_p, shared_p
25
25
  }
26
26
  }
27
27
 
28
- bool BaseScanner::FinishedFile() {
28
+ bool BaseScanner::FinishedFile() const {
29
29
  if (!cur_buffer_handle) {
30
30
  return true;
31
31
  }
@@ -76,7 +76,7 @@ void BaseScanner::FinalizeChunkProcess() {
76
76
  throw InternalException("FinalizeChunkProcess() from CSV Base Scanner is not implemented");
77
77
  }
78
78
 
79
- CSVStateMachine &BaseScanner::GetStateMachine() {
79
+ CSVStateMachine &BaseScanner::GetStateMachine() const {
80
80
  return *state_machine;
81
81
  }
82
82
 
@@ -12,10 +12,32 @@ void ColumnCountResult::AddValue(ColumnCountResult &result, idx_t buffer_pos) {
12
12
  }
13
13
 
14
14
  inline void ColumnCountResult::InternalAddRow() {
15
- column_counts[result_position].number_of_columns = current_column_count + 1;
15
+ const idx_t column_count = current_column_count + 1;
16
+ column_counts[result_position].number_of_columns = column_count;
17
+ rows_per_column_count[column_count]++;
16
18
  current_column_count = 0;
17
19
  }
18
20
 
21
+ idx_t ColumnCountResult::GetMostFrequentColumnCount() const {
22
+ if (rows_per_column_count.empty()) {
23
+ return 1;
24
+ }
25
+ idx_t column_count = 0;
26
+ idx_t current_max = 0;
27
+ for (auto &rpc : rows_per_column_count) {
28
+ if (rpc.second > current_max) {
29
+ current_max = rpc.second;
30
+ column_count = rpc.first;
31
+ } else if (rpc.second == current_max) {
32
+ // We pick the largest to untie
33
+ if (rpc.first > column_count) {
34
+ column_count = rpc.first;
35
+ }
36
+ }
37
+ }
38
+ return column_count;
39
+ }
40
+
19
41
  bool ColumnCountResult::AddRow(ColumnCountResult &result, idx_t buffer_pos) {
20
42
  result.InternalAddRow();
21
43
  if (!result.states.EmptyLastValue()) {
@@ -28,6 +28,11 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
28
28
  current_errors(state_machine.options.IgnoreErrors()), sniffing(sniffing_p), path(std::move(path_p)) {
29
29
  // Vector information
30
30
  D_ASSERT(number_of_columns > 0);
31
+ if (!buffer_handle) {
32
+ // It Was Over Before It Even Began
33
+ D_ASSERT(iterator.done);
34
+ return;
35
+ }
31
36
  buffer_handles[buffer_handle->buffer_idx] = buffer_handle;
32
37
  // Buffer Information
33
38
  buffer_ptr = buffer_handle->Ptr();
@@ -264,6 +269,10 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
264
269
  }
265
270
  bool success = true;
266
271
  switch (parse_types[chunk_col_id].type_id) {
272
+ case LogicalTypeId::BOOLEAN:
273
+ success =
274
+ TryCastStringBool(value_ptr, size, static_cast<bool *>(vector_ptr[chunk_col_id])[number_of_rows], false);
275
+ break;
267
276
  case LogicalTypeId::TINYINT:
268
277
  success = TrySimpleIntegerCast(value_ptr, size, static_cast<int8_t *>(vector_ptr[chunk_col_id])[number_of_rows],
269
278
  false);
@@ -644,9 +653,15 @@ bool LineError::HandleErrors(StringValueResult &result) {
644
653
  result.error_handler.Error(csv_error);
645
654
  }
646
655
  if (is_error_in_line) {
647
- result.borked_rows.insert(result.number_of_rows);
648
- result.cur_col_id = 0;
649
- result.chunk_col_id = 0;
656
+ if (result.sniffing) {
657
+ // If we are sniffing we just remove the line
658
+ result.RemoveLastLine();
659
+ } else {
660
+ // Otherwise, we add it to the borked rows to remove it later and just cleanup the column variables.
661
+ result.borked_rows.insert(result.number_of_rows);
662
+ result.cur_col_id = 0;
663
+ result.chunk_col_id = 0;
664
+ }
650
665
  Reset();
651
666
  return true;
652
667
  }
@@ -1437,6 +1452,7 @@ bool StringValueScanner::CanDirectlyCast(const LogicalType &type, bool icu_loade
1437
1452
  case LogicalTypeId::TIME:
1438
1453
  case LogicalTypeId::DECIMAL:
1439
1454
  case LogicalType::VARCHAR:
1455
+ case LogicalType::BOOLEAN:
1440
1456
  return true;
1441
1457
  case LogicalType::TIMESTAMP_TZ:
1442
1458
  // We only try to do direct cast of timestamp tz if the ICU extension is not loaded, otherwise, it needs to go
@@ -1493,7 +1509,7 @@ void StringValueScanner::SetStart() {
1493
1509
  }
1494
1510
  if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size ||
1495
1511
  scan_finder->iterator.GetBufferIdx() > iterator.GetBufferIdx()) {
1496
- // If things go terribly wrong, we never loop indefinetly.
1512
+ // If things go terribly wrong, we never loop indefinitely.
1497
1513
  iterator.pos.buffer_idx = scan_finder->iterator.pos.buffer_idx;
1498
1514
  iterator.pos.buffer_pos = scan_finder->iterator.pos.buffer_pos;
1499
1515
  result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size};
@@ -1521,8 +1537,11 @@ void StringValueScanner::FinalizeChunkProcess() {
1521
1537
  // If we are not done we have two options.
1522
1538
  // 1) If a boundary is set.
1523
1539
  if (iterator.IsBoundarySet()) {
1540
+ bool has_unterminated_quotes = false;
1524
1541
  if (!result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
1525
1542
  iterator.done = true;
1543
+ } else {
1544
+ has_unterminated_quotes = true;
1526
1545
  }
1527
1546
  // We read until the next line or until we have nothing else to read.
1528
1547
  // Move to next buffer
@@ -1540,6 +1559,16 @@ void StringValueScanner::FinalizeChunkProcess() {
1540
1559
  MoveToNextBuffer();
1541
1560
  }
1542
1561
  } else {
1562
+ if (result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
1563
+ has_unterminated_quotes = true;
1564
+ }
1565
+ result.current_errors.HandleErrors(result);
1566
+ }
1567
+ if (states.IsQuotedCurrent() && !has_unterminated_quotes) {
1568
+ // If we finish the execution of a buffer, and we end in a quoted state, it means we have unterminated
1569
+ // quotes
1570
+ result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id,
1571
+ result.last_position);
1543
1572
  result.current_errors.HandleErrors(result);
1544
1573
  }
1545
1574
  if (!iterator.done) {
@@ -134,7 +134,11 @@ SnifferResult CSVSniffer::MinimalSniff() {
134
134
  for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
135
135
  auto &cur_vector = data_chunk.data[col_idx];
136
136
  auto vector_data = FlatVector::GetData<string_t>(cur_vector);
137
- HeaderValue val(vector_data[0]);
137
+ auto &validity = FlatVector::Validity(cur_vector);
138
+ HeaderValue val;
139
+ if (validity.RowIsValid(0)) {
140
+ val = HeaderValue(vector_data[0]);
141
+ }
138
142
  potential_header.emplace_back(val);
139
143
  }
140
144
  }
@@ -221,13 +225,16 @@ SnifferResult CSVSniffer::SniffCSV(bool force_match) {
221
225
  // If the header exists it should match
222
226
  string header_error = "The Column names set by the user do not match the ones found by the sniffer. \n";
223
227
  auto &set_names = *set_columns.names;
224
- for (idx_t i = 0; i < set_columns.Size(); i++) {
225
- if (set_names[i] != names[i]) {
226
- header_error += "Column at position: " + to_string(i) + " Set name: " + set_names[i] +
227
- " Sniffed Name: " + names[i] + "\n";
228
- match = false;
228
+ if (set_names.size() == names.size()) {
229
+ for (idx_t i = 0; i < set_columns.Size(); i++) {
230
+ if (set_names[i] != names[i]) {
231
+ header_error += "Column at position: " + to_string(i) + " Set name: " + set_names[i] +
232
+ " Sniffed Name: " + names[i] + "\n";
233
+ match = false;
234
+ }
229
235
  }
230
236
  }
237
+
231
238
  if (!match) {
232
239
  error += header_error;
233
240
  }
@@ -235,15 +242,18 @@ SnifferResult CSVSniffer::SniffCSV(bool force_match) {
235
242
  match = true;
236
243
  string type_error = "The Column types set by the user do not match the ones found by the sniffer. \n";
237
244
  auto &set_types = *set_columns.types;
238
- for (idx_t i = 0; i < set_columns.Size(); i++) {
239
- if (set_types[i] != detected_types[i]) {
240
- type_error += "Column at position: " + to_string(i) + " Set type: " + set_types[i].ToString() +
241
- " Sniffed type: " + detected_types[i].ToString() + "\n";
242
- detected_types[i] = set_types[i];
243
- manually_set[i] = true;
244
- match = false;
245
+ if (detected_types.size() == set_columns.Size()) {
246
+ for (idx_t i = 0; i < set_columns.Size(); i++) {
247
+ if (set_types[i] != detected_types[i]) {
248
+ type_error += "Column at position: " + to_string(i) + " Set type: " + set_types[i].ToString() +
249
+ " Sniffed type: " + detected_types[i].ToString() + "\n";
250
+ detected_types[i] = set_types[i];
251
+ manually_set[i] = true;
252
+ match = false;
253
+ }
245
254
  }
246
255
  }
256
+
247
257
  if (!match) {
248
258
  error += type_error;
249
259
  }