duckdb 1.3.1-dev6.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/core_functions/aggregate/distributive/arg_min_max.cpp +27 -39
  3. package/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp +2 -3
  4. package/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_sort_tree.hpp +1 -1
  5. package/src/duckdb/extension/core_functions/lambda_functions.cpp +16 -14
  6. package/src/duckdb/extension/core_functions/scalar/list/list_filter.cpp +3 -2
  7. package/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp +46 -10
  8. package/src/duckdb/extension/core_functions/scalar/list/list_transform.cpp +3 -2
  9. package/src/duckdb/extension/core_functions/scalar/random/random.cpp +3 -1
  10. package/src/duckdb/extension/icu/icu-datefunc.cpp +5 -3
  11. package/src/duckdb/extension/icu/icu-strptime.cpp +6 -1
  12. package/src/duckdb/extension/icu/icu-timezone.cpp +4 -0
  13. package/src/duckdb/extension/icu/icu_extension.cpp +7 -2
  14. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +1 -1
  15. package/src/duckdb/extension/icu/include/icu-helpers.hpp +1 -1
  16. package/src/duckdb/extension/icu/third_party/icu/common/uloc.cpp +5 -5
  17. package/src/duckdb/extension/json/include/json_common.hpp +19 -0
  18. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -4
  19. package/src/duckdb/extension/json/include/json_functions.hpp +4 -4
  20. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +38 -17
  21. package/src/duckdb/extension/json/json_functions/json_table_in_out.cpp +11 -7
  22. package/src/duckdb/extension/json/json_functions.cpp +4 -4
  23. package/src/duckdb/extension/json/json_reader.cpp +1 -1
  24. package/src/duckdb/extension/parquet/column_reader.cpp +7 -1
  25. package/src/duckdb/extension/parquet/include/parquet_bss_decoder.hpp +2 -2
  26. package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +2 -2
  27. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +2 -1
  28. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +1 -1
  29. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +3 -0
  30. package/src/duckdb/extension/parquet/include/writer/parquet_write_operators.hpp +3 -1
  31. package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +1 -1
  32. package/src/duckdb/extension/parquet/parquet_crypto.cpp +9 -5
  33. package/src/duckdb/extension/parquet/parquet_extension.cpp +26 -0
  34. package/src/duckdb/extension/parquet/parquet_float16.cpp +4 -2
  35. package/src/duckdb/extension/parquet/parquet_metadata.cpp +3 -3
  36. package/src/duckdb/extension/parquet/parquet_multi_file_info.cpp +12 -0
  37. package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -4
  38. package/src/duckdb/extension/parquet/parquet_statistics.cpp +13 -3
  39. package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -1
  40. package/src/duckdb/extension/parquet/reader/decimal_column_reader.cpp +1 -1
  41. package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
  42. package/src/duckdb/extension/parquet/reader/struct_column_reader.cpp +13 -4
  43. package/src/duckdb/extension/parquet/serialize_parquet.cpp +2 -0
  44. package/src/duckdb/src/catalog/catalog.cpp +10 -4
  45. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +4 -10
  46. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -2
  47. package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +2 -2
  49. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +1 -1
  50. package/src/duckdb/src/catalog/catalog_search_path.cpp +7 -1
  51. package/src/duckdb/src/catalog/catalog_set.cpp +21 -1
  52. package/src/duckdb/src/common/adbc/adbc.cpp +1 -1
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +17 -5
  54. package/src/duckdb/src/common/arrow/arrow_converter.cpp +23 -15
  55. package/src/duckdb/src/common/box_renderer.cpp +1 -2
  56. package/src/duckdb/src/common/enum_util.cpp +4 -3
  57. package/src/duckdb/src/common/local_file_system.cpp +13 -12
  58. package/src/duckdb/src/common/multi_file/multi_file_column_mapper.cpp +35 -12
  59. package/src/duckdb/src/common/multi_file/multi_file_reader.cpp +13 -3
  60. package/src/duckdb/src/common/string_util.cpp +7 -5
  61. package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +4 -4
  62. package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +4 -4
  63. package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +4 -4
  64. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +4 -4
  65. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +1 -1
  66. package/src/duckdb/src/common/types/uuid.cpp +5 -1
  67. package/src/duckdb/src/common/types.cpp +28 -0
  68. package/src/duckdb/src/common/virtual_file_system.cpp +5 -0
  69. package/src/duckdb/src/execution/column_binding_resolver.cpp +49 -30
  70. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +4 -0
  71. package/src/duckdb/src/execution/join_hashtable.cpp +10 -7
  72. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
  73. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +1 -1
  74. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +2 -1
  75. package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +1 -4
  76. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +53 -1
  77. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +58 -59
  78. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +10 -5
  79. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +4 -0
  80. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +18 -8
  81. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +1 -1
  82. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +1 -0
  83. package/src/duckdb/src/execution/physical_plan_generator.cpp +5 -5
  84. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +2 -1
  85. package/src/duckdb/src/function/function.cpp +4 -0
  86. package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +6 -0
  87. package/src/duckdb/src/function/scalar/struct/remap_struct.cpp +10 -1
  88. package/src/duckdb/src/function/table/copy_csv.cpp +1 -0
  89. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  90. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
  91. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
  93. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +2 -0
  94. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -2
  95. package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -9
  96. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_column_mapper.hpp +3 -5
  98. package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_reader.hpp +7 -0
  99. package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp +3 -0
  100. package/src/duckdb/src/include/duckdb/common/shadow_forbidden_functions.hpp +40 -0
  101. package/src/duckdb/src/include/duckdb/common/string.hpp +25 -2
  102. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +20 -24
  103. package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +20 -24
  104. package/src/duckdb/src/include/duckdb/common/types.hpp +3 -0
  105. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +34 -8
  106. package/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp +1 -0
  107. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +3 -2
  108. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +3 -0
  109. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
  110. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +15 -3
  111. package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +2 -2
  112. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +7 -3
  113. package/src/duckdb/src/include/duckdb/function/function.hpp +1 -0
  114. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +2 -1
  115. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +20 -12
  116. package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +4 -3
  117. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +3 -1
  118. package/src/duckdb/src/include/duckdb/logging/log_type.hpp +17 -0
  119. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -0
  120. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +22 -6
  121. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  122. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -1
  123. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +27 -13
  124. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +1 -0
  125. package/src/duckdb/src/include/duckdb/main/settings.hpp +11 -0
  126. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +7 -1
  127. package/src/duckdb/src/include/duckdb/original/std/locale.hpp +10 -0
  128. package/src/duckdb/src/include/duckdb/original/std/memory.hpp +12 -0
  129. package/src/duckdb/src/include/duckdb/original/std/sstream.hpp +11 -0
  130. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +5 -3
  131. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +4 -2
  132. package/src/duckdb/src/logging/log_manager.cpp +1 -0
  133. package/src/duckdb/src/logging/log_types.cpp +40 -0
  134. package/src/duckdb/src/main/attached_database.cpp +4 -0
  135. package/src/duckdb/src/main/client_context.cpp +1 -0
  136. package/src/duckdb/src/main/config.cpp +1 -0
  137. package/src/duckdb/src/main/database.cpp +1 -0
  138. package/src/duckdb/src/main/database_manager.cpp +19 -2
  139. package/src/duckdb/src/main/extension/extension_helper.cpp +4 -3
  140. package/src/duckdb/src/main/query_profiler.cpp +2 -2
  141. package/src/duckdb/src/main/query_result.cpp +1 -1
  142. package/src/duckdb/src/main/secret/secret_manager.cpp +2 -0
  143. package/src/duckdb/src/main/settings/autogenerated_settings.cpp +7 -0
  144. package/src/duckdb/src/main/settings/custom_settings.cpp +106 -34
  145. package/src/duckdb/src/optimizer/optimizer.cpp +1 -1
  146. package/src/duckdb/src/optimizer/topn_optimizer.cpp +18 -8
  147. package/src/duckdb/src/parallel/executor.cpp +5 -0
  148. package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +1 -1
  149. package/src/duckdb/src/parser/transform/expression/transform_interval.cpp +5 -1
  150. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +21 -24
  151. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -8
  152. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +3 -2
  153. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +0 -4
  154. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +3 -0
  155. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +3 -0
  156. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +3 -0
  157. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
  158. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +0 -1
  159. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
  160. package/src/duckdb/src/planner/expression_binder.cpp +4 -2
  161. package/src/duckdb/src/planner/logical_operator.cpp +2 -1
  162. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +4 -1
  163. package/src/duckdb/src/storage/buffer/block_handle.cpp +8 -0
  164. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +44 -18
  165. package/src/duckdb/src/storage/caching_file_system.cpp +7 -7
  166. package/src/duckdb/src/storage/standard_buffer_manager.cpp +4 -3
  167. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  168. package/src/duckdb/src/storage/wal_replay.cpp +9 -4
  169. package/src/duckdb/third_party/fmt/include/fmt/format.h +8 -1
  170. package/src/duckdb/third_party/fsst/libfsst.cpp +4 -3
  171. package/src/duckdb/third_party/httplib/httplib.hpp +25 -22
  172. package/src/duckdb/third_party/hyperloglog/sds.cpp +7 -3
  173. package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +8 -1
  174. package/src/duckdb/third_party/re2/re2/filtered_re2.h +8 -2
  175. package/src/duckdb/third_party/re2/re2/pod_array.h +7 -1
  176. package/src/duckdb/third_party/re2/re2/re2.cc +6 -2
  177. package/src/duckdb/third_party/re2/re2/set.cc +1 -1
  178. package/src/duckdb/third_party/re2/re2/set.h +7 -1
  179. package/src/duckdb/ub_src_logging.cpp +4 -4
@@ -15,25 +15,25 @@
15
15
  namespace duckdb {
16
16
 
17
17
  string HTMLTreeRenderer::ToString(const LogicalOperator &op) {
18
- std::stringstream ss;
18
+ duckdb::stringstream ss;
19
19
  Render(op, ss);
20
20
  return ss.str();
21
21
  }
22
22
 
23
23
  string HTMLTreeRenderer::ToString(const PhysicalOperator &op) {
24
- std::stringstream ss;
24
+ duckdb::stringstream ss;
25
25
  Render(op, ss);
26
26
  return ss.str();
27
27
  }
28
28
 
29
29
  string HTMLTreeRenderer::ToString(const ProfilingNode &op) {
30
- std::stringstream ss;
30
+ duckdb::stringstream ss;
31
31
  Render(op, ss);
32
32
  return ss.str();
33
33
  }
34
34
 
35
35
  string HTMLTreeRenderer::ToString(const Pipeline &op) {
36
- std::stringstream ss;
36
+ duckdb::stringstream ss;
37
37
  Render(op, ss);
38
38
  return ss.str();
39
39
  }
@@ -19,25 +19,25 @@ using namespace duckdb_yyjson; // NOLINT
19
19
  namespace duckdb {
20
20
 
21
21
  string JSONTreeRenderer::ToString(const LogicalOperator &op) {
22
- std::stringstream ss;
22
+ duckdb::stringstream ss;
23
23
  Render(op, ss);
24
24
  return ss.str();
25
25
  }
26
26
 
27
27
  string JSONTreeRenderer::ToString(const PhysicalOperator &op) {
28
- std::stringstream ss;
28
+ duckdb::stringstream ss;
29
29
  Render(op, ss);
30
30
  return ss.str();
31
31
  }
32
32
 
33
33
  string JSONTreeRenderer::ToString(const ProfilingNode &op) {
34
- std::stringstream ss;
34
+ duckdb::stringstream ss;
35
35
  Render(op, ss);
36
36
  return ss.str();
37
37
  }
38
38
 
39
39
  string JSONTreeRenderer::ToString(const Pipeline &op) {
40
- std::stringstream ss;
40
+ duckdb::stringstream ss;
41
41
  Render(op, ss);
42
42
  return ss.str();
43
43
  }
@@ -283,25 +283,25 @@ void TextTreeRenderer::RenderBoxContent(RenderTree &root, std::ostream &ss, idx_
283
283
  }
284
284
 
285
285
  string TextTreeRenderer::ToString(const LogicalOperator &op) {
286
- std::stringstream ss;
286
+ duckdb::stringstream ss;
287
287
  Render(op, ss);
288
288
  return ss.str();
289
289
  }
290
290
 
291
291
  string TextTreeRenderer::ToString(const PhysicalOperator &op) {
292
- std::stringstream ss;
292
+ duckdb::stringstream ss;
293
293
  Render(op, ss);
294
294
  return ss.str();
295
295
  }
296
296
 
297
297
  string TextTreeRenderer::ToString(const ProfilingNode &op) {
298
- std::stringstream ss;
298
+ duckdb::stringstream ss;
299
299
  Render(op, ss);
300
300
  return ss.str();
301
301
  }
302
302
 
303
303
  string TextTreeRenderer::ToString(const Pipeline &op) {
304
- std::stringstream ss;
304
+ duckdb::stringstream ss;
305
305
  Render(op, ss);
306
306
  return ss.str();
307
307
  }
@@ -26,7 +26,7 @@ static inline void SwapTupleDataChunk(TupleDataChunk &a, TupleDataChunk &b) noex
26
26
  std::swap(a.lock, b.lock);
27
27
  }
28
28
 
29
- TupleDataChunk::TupleDataChunk(TupleDataChunk &&other) noexcept {
29
+ TupleDataChunk::TupleDataChunk(TupleDataChunk &&other) noexcept : count(0) {
30
30
  SwapTupleDataChunk(*this, other);
31
31
  }
32
32
 
@@ -1,4 +1,5 @@
1
1
  #include "duckdb/common/types/uuid.hpp"
2
+ #include "duckdb/common/chrono.hpp"
2
3
  #include "duckdb/common/random_engine.hpp"
3
4
 
4
5
  namespace duckdb {
@@ -210,7 +211,10 @@ hugeint_t UUIDv7::GenerateRandomUUID(RandomEngine &engine) {
210
211
  // Fill in variant field.
211
212
  bytes[8] = (bytes[8] & 0x3f) | 0x80;
212
213
 
213
- return Convert(bytes);
214
+ // Flip the top byte
215
+ auto result = Convert(bytes);
216
+ result.upper ^= NumericLimits<int64_t>::Minimum();
217
+ return result;
214
218
  }
215
219
 
216
220
  hugeint_t UUIDv7::GenerateRandomUUID() {
@@ -648,6 +648,34 @@ bool LogicalType::IsIntegral() const {
648
648
  }
649
649
  }
650
650
 
651
+ bool LogicalType::IsSigned() const {
652
+ switch (id_) {
653
+ case LogicalTypeId::TINYINT:
654
+ case LogicalTypeId::SMALLINT:
655
+ case LogicalTypeId::INTEGER:
656
+ case LogicalTypeId::BIGINT:
657
+ case LogicalTypeId::HUGEINT:
658
+ return true;
659
+ default:
660
+ break;
661
+ }
662
+ return false;
663
+ }
664
+
665
+ bool LogicalType::IsUnsigned() const {
666
+ switch (id_) {
667
+ case LogicalTypeId::UTINYINT:
668
+ case LogicalTypeId::USMALLINT:
669
+ case LogicalTypeId::UINTEGER:
670
+ case LogicalTypeId::UBIGINT:
671
+ case LogicalTypeId::UHUGEINT:
672
+ return true;
673
+ default:
674
+ break;
675
+ }
676
+ return false;
677
+ }
678
+
651
679
  bool LogicalType::IsFloating() const {
652
680
  switch (id_) {
653
681
  case LogicalTypeId::FLOAT:
@@ -41,6 +41,11 @@ unique_ptr<FileHandle> VirtualFileSystem::OpenFileExtended(const OpenFileInfo &f
41
41
  } else if (compression != FileCompressionType::UNCOMPRESSED) {
42
42
  auto entry = compressed_fs.find(compression);
43
43
  if (entry == compressed_fs.end()) {
44
+ if (compression == FileCompressionType::ZSTD) {
45
+ throw NotImplementedException(
46
+ "Attempting to open a compressed file, but the compression type is not supported.\nConsider "
47
+ "explicitly \"INSTALL parquet; LOAD parquet;\" to support this compression scheme");
48
+ }
44
49
  throw NotImplementedException(
45
50
  "Attempting to open a compressed file, but the compression type is not supported");
46
51
  }
@@ -26,13 +26,6 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
26
26
  for (auto &cond : comp_join.conditions) {
27
27
  VisitExpression(&cond.left);
28
28
  }
29
- // resolve any single-side predicates
30
- // for now, only ASOF supports this, and we are guaranteed that all right side predicates
31
- // have been pushed into a filter.
32
- if (comp_join.predicate) {
33
- D_ASSERT(op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN);
34
- VisitExpression(&comp_join.predicate);
35
- }
36
29
  // visit the duplicate eliminated columns on the LHS, if any
37
30
  for (auto &expr : comp_join.duplicate_eliminated_columns) {
38
31
  VisitExpression(&expr);
@@ -44,40 +37,38 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
44
37
  }
45
38
  // finally update the bindings with the result bindings of the join
46
39
  bindings = op.GetColumnBindings();
40
+ types = op.types;
41
+ // resolve any mixed predicates
42
+ // for now, only ASOF supports this.
43
+ if (comp_join.predicate) {
44
+ D_ASSERT(op.type == LogicalOperatorType::LOGICAL_ASOF_JOIN);
45
+ VisitExpression(&comp_join.predicate);
46
+ }
47
47
  return;
48
48
  }
49
49
  case LogicalOperatorType::LOGICAL_DELIM_JOIN: {
50
50
  auto &comp_join = op.Cast<LogicalComparisonJoin>();
51
- // depending on whether the delim join has been flipped, get the appropriate bindings
52
- if (comp_join.delim_flipped) {
53
- VisitOperator(*comp_join.children[1]);
54
- for (auto &cond : comp_join.conditions) {
55
- VisitExpression(&cond.right);
56
- }
57
- } else {
58
- VisitOperator(*comp_join.children[0]);
59
- for (auto &cond : comp_join.conditions) {
60
- VisitExpression(&cond.left);
61
- }
51
+ // get bindings from the duplicate-eliminated side
52
+ auto &delim_side = comp_join.delim_flipped ? *comp_join.children[1] : *comp_join.children[0];
53
+ VisitOperator(delim_side);
54
+ for (auto &cond : comp_join.conditions) {
55
+ auto &expr = comp_join.delim_flipped ? cond.right : cond.left;
56
+ VisitExpression(&expr);
62
57
  }
63
58
  // visit the duplicate eliminated columns
64
59
  for (auto &expr : comp_join.duplicate_eliminated_columns) {
65
60
  VisitExpression(&expr);
66
61
  }
67
- // now get the other side
68
- if (comp_join.delim_flipped) {
69
- VisitOperator(*comp_join.children[0]);
70
- for (auto &cond : comp_join.conditions) {
71
- VisitExpression(&cond.left);
72
- }
73
- } else {
74
- VisitOperator(*comp_join.children[1]);
75
- for (auto &cond : comp_join.conditions) {
76
- VisitExpression(&cond.right);
77
- }
62
+ // now the other side
63
+ auto &other_side = comp_join.delim_flipped ? *comp_join.children[0] : *comp_join.children[1];
64
+ VisitOperator(other_side);
65
+ for (auto &cond : comp_join.conditions) {
66
+ auto &expr = comp_join.delim_flipped ? cond.left : cond.right;
67
+ VisitExpression(&expr);
78
68
  }
79
69
  // finally update the bindings with the result bindings of the join
80
70
  bindings = op.GetColumnBindings();
71
+ types = op.types;
81
72
  return;
82
73
  }
83
74
  case LogicalOperatorType::LOGICAL_ANY_JOIN: {
@@ -86,10 +77,13 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
86
77
  // this operator
87
78
  VisitOperatorChildren(op);
88
79
  bindings = op.GetColumnBindings();
80
+ types = op.types;
89
81
  auto &any_join = op.Cast<LogicalAnyJoin>();
90
82
  if (any_join.join_type == JoinType::SEMI || any_join.join_type == JoinType::ANTI) {
91
83
  auto right_bindings = op.children[1]->GetColumnBindings();
92
84
  bindings.insert(bindings.end(), right_bindings.begin(), right_bindings.end());
85
+ auto &right_types = op.children[1]->types;
86
+ types.insert(types.end(), right_types.begin(), right_types.end());
93
87
  }
94
88
  if (any_join.join_type == JoinType::RIGHT_SEMI || any_join.join_type == JoinType::RIGHT_ANTI) {
95
89
  throw InternalException("RIGHT SEMI/ANTI any join not supported yet");
@@ -102,12 +96,15 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
102
96
  // afterwards bind the expressions of the CREATE INDEX statement
103
97
  auto &create_index = op.Cast<LogicalCreateIndex>();
104
98
  bindings = LogicalOperator::GenerateColumnBindings(0, create_index.table.GetColumns().LogicalColumnCount());
99
+ // TODO: fill types in too (clearing skips type checks)
100
+ types.clear();
105
101
  VisitOperatorExpressions(op);
106
102
  return;
107
103
  }
108
104
  case LogicalOperatorType::LOGICAL_GET: {
109
105
  //! We first need to update the current set of bindings and then visit operator expressions
110
106
  bindings = op.GetColumnBindings();
107
+ types = op.types;
111
108
  VisitOperatorExpressions(op);
112
109
  return;
113
110
  }
@@ -123,6 +120,8 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
123
120
  // Now insert our dummy bindings at the start of the bindings,
124
121
  // so the first 'column_count' indices of the chunk are reserved for our 'excluded' columns
125
122
  bindings.insert(bindings.begin(), dummy_bindings.begin(), dummy_bindings.end());
123
+ // TODO: fill types in too (clearing skips type checks)
124
+ types.clear();
126
125
  if (insert_op.on_conflict_condition) {
127
126
  VisitExpression(&insert_op.on_conflict_condition);
128
127
  }
@@ -131,20 +130,25 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
131
130
  }
132
131
  VisitOperatorExpressions(op);
133
132
  bindings = op.GetColumnBindings();
133
+ types = op.types;
134
134
  return;
135
135
  }
136
136
  break;
137
137
  }
138
138
  case LogicalOperatorType::LOGICAL_EXTENSION_OPERATOR: {
139
139
  auto &ext_op = op.Cast<LogicalExtensionOperator>();
140
+ // Just to be very sure, we clear before and after resolving extension operator column bindings
141
+ // This skips checks, but makes sure we don't break any extension operators with type verification
142
+ types.clear();
140
143
  ext_op.ResolveColumnBindings(*this, bindings);
144
+ types.clear();
141
145
  return;
142
146
  }
143
147
  case LogicalOperatorType::LOGICAL_RECURSIVE_CTE: {
144
148
  auto &rec = op.Cast<LogicalRecursiveCTE>();
145
149
  VisitOperatorChildren(op);
146
150
  bindings = op.GetColumnBindings();
147
-
151
+ types = op.types;
148
152
  for (auto &expr : rec.key_targets) {
149
153
  VisitExpression(&expr);
150
154
  }
@@ -161,6 +165,7 @@ void ColumnBindingResolver::VisitOperator(LogicalOperator &op) {
161
165
  VisitOperatorExpressions(op);
162
166
  // finally update the current set of bindings to the current set of column bindings
163
167
  bindings = op.GetColumnBindings();
168
+ types = op.types;
164
169
  }
165
170
 
166
171
  unique_ptr<Expression> ColumnBindingResolver::VisitReplace(BoundColumnRefExpression &expr,
@@ -169,6 +174,19 @@ unique_ptr<Expression> ColumnBindingResolver::VisitReplace(BoundColumnRefExpress
169
174
  // check the current set of column bindings to see which index corresponds to the column reference
170
175
  for (idx_t i = 0; i < bindings.size(); i++) {
171
176
  if (expr.binding == bindings[i]) {
177
+ if (!types.empty()) {
178
+ if (bindings.size() != types.size()) {
179
+ throw InternalException(
180
+ "Failed to bind column reference \"%s\" [%d.%d]: inequal num bindings/types (%llu != %llu)",
181
+ expr.GetAlias(), expr.binding.table_index, expr.binding.column_index, bindings.size(),
182
+ types.size());
183
+ }
184
+ if (expr.return_type != types[i]) {
185
+ throw InternalException("Failed to bind column reference \"%s\" [%d.%d]: inequal types (%s != %s)",
186
+ expr.GetAlias(), expr.binding.table_index, expr.binding.column_index,
187
+ expr.return_type.ToString(), types[i].ToString());
188
+ }
189
+ }
172
190
  if (verify_only) {
173
191
  // in verification mode
174
192
  return nullptr;
@@ -210,6 +228,7 @@ unordered_set<idx_t> ColumnBindingResolver::VerifyInternal(LogicalOperator &op)
210
228
 
211
229
  void ColumnBindingResolver::Verify(LogicalOperator &op) {
212
230
  #ifdef DEBUG
231
+ op.ResolveOperatorTypes();
213
232
  ColumnBindingResolver resolver(true);
214
233
  resolver.VisitOperator(op);
215
234
  VerifyInternal(op);
@@ -316,6 +316,10 @@ void FixedSizeAllocator::Init(const FixedSizeAllocatorInfo &info) {
316
316
  }
317
317
 
318
318
  auto buffer_block_pointer = info.block_pointers[i];
319
+ if (buffer_block_pointer.block_id >= MAXIMUM_BLOCK) {
320
+ throw SerializationException("invalid block ID in index storage information");
321
+ }
322
+
319
323
  auto segment_count = info.segment_counts[i];
320
324
  auto allocation_size = info.allocation_sizes[i];
321
325
 
@@ -8,6 +8,7 @@
8
8
  #include "duckdb/storage/buffer_manager.hpp"
9
9
 
10
10
  namespace duckdb {
11
+
11
12
  using ValidityBytes = JoinHashTable::ValidityBytes;
12
13
  using ScanStructure = JoinHashTable::ScanStructure;
13
14
  using ProbeSpill = JoinHashTable::ProbeSpill;
@@ -29,9 +30,10 @@ JoinHashTable::InsertState::InsertState(const JoinHashTable &ht)
29
30
  ht.data_collection->InitializeChunkState(chunk_state, ht.equality_predicate_columns);
30
31
  }
31
32
 
32
- JoinHashTable::JoinHashTable(ClientContext &context_p, const vector<JoinCondition> &conditions_p,
33
- vector<LogicalType> btypes, JoinType type_p, const vector<idx_t> &output_columns_p)
34
- : context(context_p), buffer_manager(BufferManager::GetBufferManager(context)), conditions(conditions_p),
33
+ JoinHashTable::JoinHashTable(ClientContext &context_p, const PhysicalOperator &op_p,
34
+ const vector<JoinCondition> &conditions_p, vector<LogicalType> btypes, JoinType type_p,
35
+ const vector<idx_t> &output_columns_p)
36
+ : context(context_p), op(op_p), buffer_manager(BufferManager::GetBufferManager(context)), conditions(conditions_p),
35
37
  build_types(std::move(btypes)), output_columns(output_columns_p), entry_size(0), tuple_size(0),
36
38
  vfound(Value::BOOLEAN(false)), join_type(type_p), finalized(false), has_null(false),
37
39
  radix_bits(INITIAL_RADIX_BITS) {
@@ -258,7 +260,6 @@ static void GetRowPointersInternal(DataChunk &keys, TupleDataChunkState &key_sta
258
260
 
259
261
  // densify hashes: If there is no sel, flatten the hashes, else densify via UnifiedVectorFormat
260
262
  if (has_row_sel) {
261
-
262
263
  hashes_v.ToUnifiedFormat(count, hashes_unified_v);
263
264
  uses_unified = true;
264
265
 
@@ -271,8 +272,7 @@ static void GetRowPointersInternal(DataChunk &keys, TupleDataChunkState &key_sta
271
272
  hashes_dense[i] = hashes_unified[uvf_index];
272
273
  }
273
274
  } else {
274
- hashes_v.Flatten(count);
275
- state.hashes_dense_v.Reference(hashes_v);
275
+ VectorOperations::Copy(hashes_v, state.hashes_dense_v, count, 0, 0);
276
276
  }
277
277
 
278
278
  // the number of keys that match for all iterations of the following loop
@@ -282,7 +282,6 @@ static void GetRowPointersInternal(DataChunk &keys, TupleDataChunkState &key_sta
282
282
  idx_t elements_to_probe_count = count;
283
283
 
284
284
  do {
285
-
286
285
  const idx_t keys_to_compare_count = ProbeForPointers<USE_SALTS>(state, ht, entries, hashes_v, pointers_result_v,
287
286
  row_sel, elements_to_probe_count, has_row_sel);
288
287
 
@@ -756,6 +755,10 @@ void JoinHashTable::AllocatePointerTable() {
756
755
  D_ASSERT(hash_map.GetSize() == capacity * sizeof(ht_entry_t));
757
756
 
758
757
  bitmask = capacity - 1;
758
+
759
+ DUCKDB_LOG(context, PhysicalOperatorLogType, op, "JoinHashTable", "Build",
760
+ {{"rows", to_string(data_collection->Count())},
761
+ {"size", to_string(data_collection->SizeInBytes() + hash_map.GetSize())}});
759
762
  }
760
763
 
761
764
  void JoinHashTable::InitializePointerTable(idx_t entry_idx_from, idx_t entry_idx_to) {
@@ -4,7 +4,6 @@
4
4
  #include "duckdb/execution/expression_executor.hpp"
5
5
  #include "duckdb/function/aggregate_function.hpp"
6
6
  #include "duckdb/parallel/thread_context.hpp"
7
- #include "duckdb/planner/expression/bound_reference_expression.hpp"
8
7
  #include "duckdb/planner/expression/bound_window_expression.hpp"
9
8
 
10
9
  namespace duckdb {
@@ -109,7 +108,7 @@ public:
109
108
 
110
109
  struct LeadLagState {
111
110
  // Fixed size
112
- static constexpr idx_t MAX_BUFFER = 2048U;
111
+ static constexpr int64_t MAX_BUFFER = 2048;
113
112
 
114
113
  static bool ComputeOffset(ClientContext &context, BoundWindowExpression &wexpr, int64_t &offset) {
115
114
  offset = 1;
@@ -132,7 +131,7 @@ public:
132
131
  if (wexpr.GetExpressionType() == ExpressionType::WINDOW_LEAD) {
133
132
  offset = -offset;
134
133
  }
135
- return idx_t(std::abs(offset)) < MAX_BUFFER;
134
+ return std::abs(offset) < MAX_BUFFER;
136
135
  }
137
136
 
138
137
  static bool ComputeDefault(ClientContext &context, BoundWindowExpression &wexpr, Value &result) {
@@ -413,6 +412,7 @@ void StreamingWindowState::AggregateState::Execute(ExecutionContext &context, Da
413
412
 
414
413
  // Compute the arguments
415
414
  auto &arg_chunk = aggr_state.arg_chunk;
415
+ arg_chunk.Reset();
416
416
  executor.Execute(input, arg_chunk);
417
417
  arg_chunk.Flatten();
418
418
 
@@ -8,7 +8,7 @@ namespace duckdb {
8
8
 
9
9
  void CSVEncoderBuffer::Initialize(idx_t encoded_size) {
10
10
  encoded_buffer_size = encoded_size;
11
- encoded_buffer = std::unique_ptr<char[]>(new char[encoded_size]);
11
+ encoded_buffer = duckdb::unique_ptr<char[]>(new char[encoded_size]);
12
12
  }
13
13
 
14
14
  char *CSVEncoderBuffer::Ptr() const {
@@ -15,6 +15,7 @@ void ColumnCountResult::AddValue(ColumnCountResult &result, idx_t buffer_pos) {
15
15
  inline void ColumnCountResult::InternalAddRow() {
16
16
  const idx_t column_count = current_column_count + 1;
17
17
  column_counts[result_position].number_of_columns = column_count;
18
+ column_counts[result_position].empty_lines = empty_lines;
18
19
  rows_per_column_count[column_count]++;
19
20
  current_column_count = 0;
20
21
  }
@@ -102,7 +103,7 @@ void ColumnCountResult::InvalidState(ColumnCountResult &result) {
102
103
  }
103
104
 
104
105
  bool ColumnCountResult::EmptyLine(ColumnCountResult &result, idx_t buffer_pos) {
105
- // nop
106
+ result.empty_lines++;
106
107
  return false;
107
108
  }
108
109
 
@@ -51,10 +51,7 @@ void SkipResult::InvalidState(SkipResult &result) {
51
51
  }
52
52
 
53
53
  bool SkipResult::EmptyLine(SkipResult &result, const idx_t buffer_pos) {
54
- if (result.state_machine.dialect_options.num_cols == 1) {
55
- return AddRow(result, buffer_pos);
56
- }
57
- return false;
54
+ return AddRow(result, buffer_pos);
58
55
  }
59
56
 
60
57
  SkipScanner::SkipScanner(shared_ptr<CSVBufferManager> buffer_manager, const shared_ptr<CSVStateMachine> &state_machine,
@@ -26,7 +26,9 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
26
26
  : ScannerResult(states, state_machine, result_size_p),
27
27
  number_of_columns(NumericCast<uint32_t>(state_machine.dialect_options.num_cols)),
28
28
  null_padding(state_machine.options.null_padding), ignore_errors(state_machine.options.ignore_errors.GetValue()),
29
- extra_delimiter_bytes(state_machine.dialect_options.state_machine_options.delimiter.GetValue().size() - 1),
29
+ extra_delimiter_bytes(state_machine.dialect_options.state_machine_options.delimiter.GetValue().empty()
30
+ ? 0
31
+ : state_machine.dialect_options.state_machine_options.delimiter.GetValue().size() - 1),
30
32
  error_handler(error_hander_p), iterator(iterator_p), store_line_size(store_line_size_p),
31
33
  csv_file_scan(std::move(csv_file_scan_p)), lines_read(lines_read_p),
32
34
  current_errors(scan_id, state_machine.options.IgnoreErrors()), sniffing(sniffing_p), path(std::move(path_p)) {
@@ -565,6 +567,22 @@ void StringValueResult::AddPossiblyEscapedValue(StringValueResult &result, const
565
567
  result.cur_col_id++;
566
568
  result.chunk_col_id++;
567
569
  } else {
570
+ if (result.parse_chunk.data[result.chunk_col_id].GetType() != LogicalType::VARCHAR) {
571
+ // We cant have escapes on non varchar columns
572
+ result.current_errors.Insert(CAST_ERROR, result.cur_col_id, result.chunk_col_id,
573
+ result.last_position);
574
+ if (!result.state_machine.options.IgnoreErrors()) {
575
+ // We have to write the cast error message.
576
+ std::ostringstream error;
577
+ // Casting Error Message
578
+ error << "Could not convert string \"" << std::string(value_ptr, length) << "\" to \'"
579
+ << LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id) << "\'";
580
+ auto error_string = error.str();
581
+ FullLinePosition::SanitizeError(error_string);
582
+ result.current_errors.ModifyErrorMessageOfLastError(error_string);
583
+ }
584
+ return;
585
+ }
568
586
  auto value = StringValueScanner::RemoveEscape(
569
587
  value_ptr, length, result.state_machine.dialect_options.state_machine_options.escape.GetValue(),
570
588
  result.state_machine.dialect_options.state_machine_options.quote.GetValue(),
@@ -1446,6 +1464,23 @@ void StringValueScanner::ProcessOverBufferValue() {
1446
1464
  if (result.escaped) {
1447
1465
  if (!result.HandleTooManyColumnsError(over_buffer_string.c_str(), over_buffer_string.size())) {
1448
1466
  const auto str_ptr = over_buffer_string.c_str() + result.quoted_position;
1467
+ if (result.parse_chunk.data[result.chunk_col_id].GetType() != LogicalType::VARCHAR) {
1468
+ // We cant have escapes on non varchar columns
1469
+ result.current_errors.Insert(CAST_ERROR, result.cur_col_id, result.chunk_col_id,
1470
+ result.last_position);
1471
+ if (!result.state_machine.options.IgnoreErrors()) {
1472
+ // We have to write the cast error message.
1473
+ std::ostringstream error;
1474
+ // Casting Error Message
1475
+ error << "Could not convert string \""
1476
+ << std::string(over_buffer_string.c_str(), over_buffer_string.size()) << "\" to \'"
1477
+ << LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id) << "\'";
1478
+ auto error_string = error.str();
1479
+ FullLinePosition::SanitizeError(error_string);
1480
+ result.current_errors.ModifyErrorMessageOfLastError(error_string);
1481
+ }
1482
+ return;
1483
+ }
1449
1484
  value =
1450
1485
  RemoveEscape(str_ptr, over_buffer_string.size() - 2,
1451
1486
  state_machine->dialect_options.state_machine_options.escape.GetValue(),
@@ -1457,6 +1492,23 @@ void StringValueScanner::ProcessOverBufferValue() {
1457
1492
  } else {
1458
1493
  value = string_t(over_buffer_string.c_str(), UnsafeNumericCast<uint32_t>(over_buffer_string.size()));
1459
1494
  if (result.escaped) {
1495
+ if (result.parse_chunk.data[result.chunk_col_id].GetType() != LogicalType::VARCHAR) {
1496
+ // We cant have escapes on non varchar columns
1497
+ result.current_errors.Insert(CAST_ERROR, result.cur_col_id, result.chunk_col_id,
1498
+ result.last_position);
1499
+ if (!result.state_machine.options.IgnoreErrors()) {
1500
+ // We have to write the cast error message.
1501
+ std::ostringstream error;
1502
+ // Casting Error Message
1503
+ error << "Could not convert string \""
1504
+ << std::string(over_buffer_string.c_str(), over_buffer_string.size()) << "\" to \'"
1505
+ << LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id) << "\'";
1506
+ auto error_string = error.str();
1507
+ FullLinePosition::SanitizeError(error_string);
1508
+ result.current_errors.ModifyErrorMessageOfLastError(error_string);
1509
+ }
1510
+ return;
1511
+ }
1460
1512
  if (!result.HandleTooManyColumnsError(over_buffer_string.c_str(), over_buffer_string.size())) {
1461
1513
  value =
1462
1514
  RemoveEscape(over_buffer_string.c_str(), over_buffer_string.size(),