duckdb 0.7.2-dev1034.0 → 0.7.2-dev1146.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
  3. package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -1
  4. package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
  5. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
  6. package/src/duckdb/src/common/serializer/enum_serializer.cpp +6 -6
  7. package/src/duckdb/src/common/sort/comparators.cpp +14 -5
  8. package/src/duckdb/src/common/types/column_data_collection_segment.cpp +1 -4
  9. package/src/duckdb/src/common/types/interval.cpp +0 -41
  10. package/src/duckdb/src/common/types/list_segment.cpp +658 -0
  11. package/src/duckdb/src/common/types/string_heap.cpp +1 -1
  12. package/src/duckdb/src/common/types/string_type.cpp +1 -1
  13. package/src/duckdb/src/common/types/validity_mask.cpp +24 -7
  14. package/src/duckdb/src/common/types/vector.cpp +3 -7
  15. package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
  16. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
  17. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
  18. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  19. package/src/duckdb/src/execution/index/art/art.cpp +13 -0
  20. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
  21. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
  22. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +1 -0
  23. package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
  24. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +5 -1
  25. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +18 -5
  26. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
  27. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
  28. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
  29. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
  30. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +0 -4
  31. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +1 -0
  32. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
  33. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
  34. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +18 -10
  35. package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
  36. package/src/duckdb/src/function/cast_rules.cpp +9 -4
  37. package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
  38. package/src/duckdb/src/function/table/read_csv.cpp +5 -0
  39. package/src/duckdb/src/function/table/table_scan.cpp +8 -11
  40. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  41. package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
  42. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
  43. package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
  44. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
  45. package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
  46. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
  47. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +4 -1
  48. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -12
  49. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +4 -0
  50. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  51. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -0
  52. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -0
  53. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +0 -2
  54. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
  55. package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
  56. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +18 -7
  58. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -3
  59. package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
  60. package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +0 -1
  61. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +35 -43
  62. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +18 -5
  63. package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -4
  64. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -29
  65. package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +2 -3
  66. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +11 -1
  67. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
  68. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +4 -1
  69. package/src/duckdb/src/include/duckdb.h +21 -0
  70. package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
  71. package/src/duckdb/src/main/settings/settings.cpp +20 -8
  72. package/src/duckdb/src/optimizer/filter_combiner.cpp +2 -5
  73. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -0
  74. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +1 -0
  75. package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -3
  76. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +22 -0
  77. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
  78. package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
  79. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -1
  80. package/src/duckdb/src/storage/compression/numeric_constant.cpp +1 -1
  81. package/src/duckdb/src/storage/compression/rle.cpp +1 -0
  82. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
  83. package/src/duckdb/src/storage/data_table.cpp +3 -3
  84. package/src/duckdb/src/storage/local_storage.cpp +7 -0
  85. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  86. package/src/duckdb/src/storage/table/column_data.cpp +75 -18
  87. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -1
  88. package/src/duckdb/src/storage/table/column_segment.cpp +17 -31
  89. package/src/duckdb/src/storage/table/list_column_data.cpp +9 -12
  90. package/src/duckdb/src/storage/table/row_group.cpp +200 -136
  91. package/src/duckdb/src/storage/table/row_group_collection.cpp +75 -45
  92. package/src/duckdb/src/storage/table/scan_state.cpp +31 -38
  93. package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -6
  94. package/src/duckdb/src/storage/table/struct_column_data.cpp +11 -18
  95. package/src/duckdb/src/storage/table/update_segment.cpp +3 -0
  96. package/src/duckdb/ub_src_common_types.cpp +2 -0
@@ -68,24 +68,41 @@ void ValidityMask::Resize(idx_t old_size, idx_t new_size) {
68
68
  }
69
69
  }
70
70
 
71
- void ValidityMask::Slice(const ValidityMask &other, idx_t offset, idx_t end) {
71
+ void ValidityMask::Slice(const ValidityMask &other, idx_t source_offset, idx_t count) {
72
72
  if (other.AllValid()) {
73
73
  validity_mask = nullptr;
74
74
  validity_data.reset();
75
75
  return;
76
76
  }
77
- if (offset == 0) {
77
+ if (source_offset == 0) {
78
78
  Initialize(other);
79
79
  return;
80
80
  }
81
- ValidityMask new_mask(end - offset);
81
+ ValidityMask new_mask(count);
82
+ new_mask.SliceInPlace(other, 0, source_offset, count);
83
+ Initialize(new_mask);
84
+ }
82
85
 
83
- // FIXME THIS NEEDS FIXING!
86
+ bool ValidityMask::IsAligned(idx_t count) {
87
+ return count % BITS_PER_VALUE == 0;
88
+ }
89
+
90
+ void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset, idx_t source_offset, idx_t count) {
91
+ if (IsAligned(source_offset) && IsAligned(target_offset)) {
92
+ auto target_validity = GetData();
93
+ auto source_validity = other.GetData();
94
+ auto source_offset_entries = EntryCount(source_offset);
95
+ auto target_offset_entries = EntryCount(target_offset);
96
+ memcpy(target_validity + target_offset_entries, source_validity + source_offset_entries,
97
+ sizeof(validity_t) * EntryCount(count));
98
+ return;
99
+ }
100
+
101
+ // FIXME: use bitwise operations here
84
102
  #if 1
85
- for (idx_t i = offset; i < end; i++) {
86
- new_mask.Set(i - offset, other.RowIsValid(i));
103
+ for (idx_t i = 0; i < count; i++) {
104
+ Set(target_offset + i, other.RowIsValid(source_offset + i));
87
105
  }
88
- Initialize(new_mask);
89
106
  #else
90
107
  // first shift the "whole" units
91
108
  idx_t entire_units = offset / BITS_PER_VALUE;
@@ -136,17 +136,13 @@ void Vector::Slice(Vector &other, idx_t offset, idx_t end) {
136
136
  for (idx_t i = 0; i < entries.size(); i++) {
137
137
  entries[i]->Slice(*other_entries[i], offset, end);
138
138
  }
139
- if (offset > 0) {
140
- new_vector.validity.Slice(other.validity, offset, end);
141
- } else {
142
- new_vector.validity = other.validity;
143
- }
139
+ new_vector.validity.Slice(other.validity, offset, end - offset);
144
140
  Reference(new_vector);
145
141
  } else {
146
142
  Reference(other);
147
143
  if (offset > 0) {
148
144
  data = data + GetTypeIdSize(internal_type) * offset;
149
- validity.Slice(other.validity, offset, end);
145
+ validity.Slice(other.validity, offset, end - offset);
150
146
  }
151
147
  }
152
148
  }
@@ -1549,7 +1545,7 @@ string_t StringVector::AddStringOrBlob(Vector &vector, string_t data) {
1549
1545
 
1550
1546
  string_t StringVector::EmptyString(Vector &vector, idx_t len) {
1551
1547
  D_ASSERT(vector.GetType().InternalType() == PhysicalType::VARCHAR);
1552
- if (len < string_t::INLINE_LENGTH) {
1548
+ if (len <= string_t::INLINE_LENGTH) {
1553
1549
  return string_t(len);
1554
1550
  }
1555
1551
  if (!vector.auxiliary) {
@@ -65,33 +65,33 @@ inline bool ValuePositionComparator::Final<duckdb::NotEquals>(const Value &lhs,
65
65
  // Non-strict inequalities must use strict comparisons for Definite
66
66
  template <>
67
67
  bool ValuePositionComparator::Definite<duckdb::LessThanEquals>(const Value &lhs, const Value &rhs) {
68
- return ValueOperations::DistinctLessThan(lhs, rhs);
68
+ return !ValuePositionComparator::Definite<duckdb::GreaterThan>(lhs, rhs);
69
+ }
70
+
71
+ template <>
72
+ bool ValuePositionComparator::Final<duckdb::GreaterThan>(const Value &lhs, const Value &rhs) {
73
+ return ValueOperations::DistinctGreaterThan(lhs, rhs);
69
74
  }
70
75
 
71
76
  template <>
72
77
  bool ValuePositionComparator::Final<duckdb::LessThanEquals>(const Value &lhs, const Value &rhs) {
73
- return ValueOperations::DistinctLessThanEquals(lhs, rhs);
78
+ return !ValuePositionComparator::Final<duckdb::GreaterThan>(lhs, rhs);
74
79
  }
75
80
 
76
81
  template <>
77
82
  bool ValuePositionComparator::Definite<duckdb::GreaterThanEquals>(const Value &lhs, const Value &rhs) {
78
- return ValueOperations::DistinctGreaterThan(lhs, rhs);
83
+ return !ValuePositionComparator::Definite<duckdb::GreaterThan>(rhs, lhs);
79
84
  }
80
85
 
81
86
  template <>
82
87
  bool ValuePositionComparator::Final<duckdb::GreaterThanEquals>(const Value &lhs, const Value &rhs) {
83
- return ValueOperations::DistinctGreaterThanEquals(lhs, rhs);
88
+ return !ValuePositionComparator::Final<duckdb::GreaterThan>(rhs, lhs);
84
89
  }
85
90
 
86
91
  // Strict inequalities just use strict for both Definite and Final
87
92
  template <>
88
93
  bool ValuePositionComparator::Final<duckdb::LessThan>(const Value &lhs, const Value &rhs) {
89
- return ValueOperations::DistinctLessThan(lhs, rhs);
90
- }
91
-
92
- template <>
93
- bool ValuePositionComparator::Final<duckdb::GreaterThan>(const Value &lhs, const Value &rhs) {
94
- return ValueOperations::DistinctGreaterThan(lhs, rhs);
94
+ return ValuePositionComparator::Final<duckdb::GreaterThan>(rhs, lhs);
95
95
  }
96
96
 
97
97
  template <class OP>
@@ -194,10 +194,7 @@ bool ValueOperations::GreaterThan(const Value &left, const Value &right) {
194
194
  }
195
195
 
196
196
  bool ValueOperations::GreaterThanEquals(const Value &left, const Value &right) {
197
- if (left.IsNull() || right.IsNull()) {
198
- throw InternalException("Comparison on NULL values");
199
- }
200
- return TemplatedBooleanOperation<duckdb::GreaterThanEquals>(left, right);
197
+ return !ValueOperations::GreaterThan(right, left);
201
198
  }
202
199
 
203
200
  bool ValueOperations::LessThan(const Value &left, const Value &right) {
@@ -205,7 +202,7 @@ bool ValueOperations::LessThan(const Value &left, const Value &right) {
205
202
  }
206
203
 
207
204
  bool ValueOperations::LessThanEquals(const Value &left, const Value &right) {
208
- return ValueOperations::GreaterThanEquals(right, left);
205
+ return !ValueOperations::GreaterThan(left, right);
209
206
  }
210
207
 
211
208
  bool ValueOperations::NotDistinctFrom(const Value &left, const Value &right) {
@@ -234,12 +231,7 @@ bool ValueOperations::DistinctGreaterThan(const Value &left, const Value &right)
234
231
  }
235
232
 
236
233
  bool ValueOperations::DistinctGreaterThanEquals(const Value &left, const Value &right) {
237
- if (left.IsNull()) {
238
- return true;
239
- } else if (right.IsNull()) {
240
- return false;
241
- }
242
- return TemplatedBooleanOperation<duckdb::GreaterThanEquals>(left, right);
234
+ return !ValueOperations::DistinctGreaterThan(right, left);
243
235
  }
244
236
 
245
237
  bool ValueOperations::DistinctLessThan(const Value &left, const Value &right) {
@@ -247,7 +239,7 @@ bool ValueOperations::DistinctLessThan(const Value &left, const Value &right) {
247
239
  }
248
240
 
249
241
  bool ValueOperations::DistinctLessThanEquals(const Value &left, const Value &right) {
250
- return ValueOperations::DistinctGreaterThanEquals(right, left);
242
+ return !ValueOperations::DistinctGreaterThan(left, right);
251
243
  }
252
244
 
253
245
  } // namespace duckdb
@@ -22,12 +22,12 @@ bool EqualsFloat(T left, T right) {
22
22
  }
23
23
 
24
24
  template <>
25
- bool Equals::Operation(float left, float right) {
25
+ bool Equals::Operation(const float &left, const float &right) {
26
26
  return EqualsFloat<float>(left, right);
27
27
  }
28
28
 
29
29
  template <>
30
- bool Equals::Operation(double left, double right) {
30
+ bool Equals::Operation(const double &left, const double &right) {
31
31
  return EqualsFloat<double>(left, right);
32
32
  }
33
33
 
@@ -49,12 +49,12 @@ bool GreaterThanFloat(T left, T right) {
49
49
  }
50
50
 
51
51
  template <>
52
- bool GreaterThan::Operation(float left, float right) {
52
+ bool GreaterThan::Operation(const float &left, const float &right) {
53
53
  return GreaterThanFloat<float>(left, right);
54
54
  }
55
55
 
56
56
  template <>
57
- bool GreaterThan::Operation(double left, double right) {
57
+ bool GreaterThan::Operation(const double &left, const double &right) {
58
58
  return GreaterThanFloat<double>(left, right);
59
59
  }
60
60
 
@@ -77,12 +77,12 @@ bool GreaterThanEqualsFloat(T left, T right) {
77
77
  }
78
78
 
79
79
  template <>
80
- bool GreaterThanEquals::Operation(float left, float right) {
80
+ bool GreaterThanEquals::Operation(const float &left, const float &right) {
81
81
  return GreaterThanEqualsFloat<float>(left, right);
82
82
  }
83
83
 
84
84
  template <>
85
- bool GreaterThanEquals::Operation(double left, double right) {
85
+ bool GreaterThanEquals::Operation(const double &left, const double &right) {
86
86
  return GreaterThanEqualsFloat<double>(left, right);
87
87
  }
88
88
 
@@ -127,14 +127,14 @@ template <>
127
127
  inline idx_t ComparisonSelector::Select<duckdb::LessThan>(Vector &left, Vector &right, const SelectionVector *sel,
128
128
  idx_t count, SelectionVector *true_sel,
129
129
  SelectionVector *false_sel) {
130
- return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel);
130
+ return VectorOperations::GreaterThan(right, left, sel, count, true_sel, false_sel);
131
131
  }
132
132
 
133
133
  template <>
134
134
  inline idx_t ComparisonSelector::Select<duckdb::LessThanEquals>(Vector &left, Vector &right, const SelectionVector *sel,
135
135
  idx_t count, SelectionVector *true_sel,
136
136
  SelectionVector *false_sel) {
137
- return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel);
137
+ return VectorOperations::GreaterThanEquals(right, left, sel, count, true_sel, false_sel);
138
138
  }
139
139
 
140
140
  static void ComparesNotNull(UnifiedVectorFormat &ldata, UnifiedVectorFormat &rdata, ValidityMask &vresult,
@@ -272,7 +272,7 @@ void VectorOperations::GreaterThanEquals(Vector &left, Vector &right, Vector &re
272
272
  }
273
273
 
274
274
  void VectorOperations::LessThanEquals(Vector &left, Vector &right, Vector &result, idx_t count) {
275
- ComparisonExecutor::Execute<duckdb::LessThanEquals>(left, right, result, count);
275
+ ComparisonExecutor::Execute<duckdb::GreaterThanEquals>(right, left, result, count);
276
276
  }
277
277
 
278
278
  void VectorOperations::GreaterThan(Vector &left, Vector &right, Vector &result, idx_t count) {
@@ -280,7 +280,7 @@ void VectorOperations::GreaterThan(Vector &left, Vector &right, Vector &result,
280
280
  }
281
281
 
282
282
  void VectorOperations::LessThan(Vector &left, Vector &right, Vector &result, idx_t count) {
283
- ComparisonExecutor::Execute<duckdb::LessThan>(left, right, result, count);
283
+ ComparisonExecutor::Execute<duckdb::GreaterThan>(right, left, result, count);
284
284
  }
285
285
 
286
286
  } // namespace duckdb
@@ -401,14 +401,14 @@ idx_t PositionComparator::Definite<duckdb::DistinctLessThanEquals>(Vector &left,
401
401
  const SelectionVector &sel, idx_t count,
402
402
  SelectionVector *true_sel,
403
403
  SelectionVector &false_sel) {
404
- return VectorOperations::DistinctLessThan(left, right, &sel, count, true_sel, &false_sel);
404
+ return VectorOperations::DistinctGreaterThan(right, left, &sel, count, true_sel, &false_sel);
405
405
  }
406
406
 
407
407
  template <>
408
408
  idx_t PositionComparator::Final<duckdb::DistinctLessThanEquals>(Vector &left, Vector &right, const SelectionVector &sel,
409
409
  idx_t count, SelectionVector *true_sel,
410
410
  SelectionVector *false_sel) {
411
- return VectorOperations::DistinctLessThanEquals(left, right, &sel, count, true_sel, false_sel);
411
+ return VectorOperations::DistinctGreaterThanEquals(right, left, &sel, count, true_sel, false_sel);
412
412
  }
413
413
 
414
414
  template <>
@@ -432,7 +432,7 @@ template <>
432
432
  idx_t PositionComparator::Final<duckdb::DistinctLessThan>(Vector &left, Vector &right, const SelectionVector &sel,
433
433
  idx_t count, SelectionVector *true_sel,
434
434
  SelectionVector *false_sel) {
435
- return VectorOperations::DistinctLessThan(left, right, &sel, count, true_sel, false_sel);
435
+ return VectorOperations::DistinctGreaterThan(right, left, &sel, count, true_sel, false_sel);
436
436
  }
437
437
 
438
438
  template <>
@@ -869,7 +869,7 @@ idx_t VectorOperations::DistinctFrom(Vector &left, Vector &right, const Selectio
869
869
  // true := A == B with nulls being equal
870
870
  idx_t VectorOperations::NotDistinctFrom(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
871
871
  SelectionVector *true_sel, SelectionVector *false_sel) {
872
- return TemplatedDistinctSelectOperation<duckdb::NotDistinctFrom>(left, right, sel, count, true_sel, false_sel);
872
+ return count - TemplatedDistinctSelectOperation<duckdb::DistinctFrom>(left, right, sel, count, false_sel, true_sel);
873
873
  }
874
874
 
875
875
  // true := A > B with nulls being maximal
@@ -888,13 +888,13 @@ idx_t VectorOperations::DistinctGreaterThanNullsFirst(Vector &left, Vector &righ
888
888
  // true := A >= B with nulls being maximal
889
889
  idx_t VectorOperations::DistinctGreaterThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
890
890
  SelectionVector *true_sel, SelectionVector *false_sel) {
891
- return TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThanEquals>(left, right, sel, count, true_sel,
892
- false_sel);
891
+ return count -
892
+ TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThan>(right, left, sel, count, false_sel, true_sel);
893
893
  }
894
894
  // true := A < B with nulls being maximal
895
895
  idx_t VectorOperations::DistinctLessThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
896
896
  SelectionVector *true_sel, SelectionVector *false_sel) {
897
- return TemplatedDistinctSelectOperation<duckdb::DistinctLessThan>(left, right, sel, count, true_sel, false_sel);
897
+ return TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThan>(right, left, sel, count, true_sel, false_sel);
898
898
  }
899
899
 
900
900
  // true := A < B with nulls being minimal
@@ -907,8 +907,8 @@ idx_t VectorOperations::DistinctLessThanNullsFirst(Vector &left, Vector &right,
907
907
  // true := A <= B with nulls being maximal
908
908
  idx_t VectorOperations::DistinctLessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
909
909
  SelectionVector *true_sel, SelectionVector *false_sel) {
910
- return TemplatedDistinctSelectOperation<duckdb::DistinctLessThanEquals>(left, right, sel, count, true_sel,
911
- false_sel);
910
+ return TemplatedDistinctSelectOperation<duckdb::DistinctGreaterThanEquals>(right, left, sel, count, true_sel,
911
+ false_sel);
912
912
  }
913
913
 
914
914
  // true := A != B with nulls being equal, inputs selected
@@ -919,7 +919,8 @@ idx_t VectorOperations::NestedNotEquals(Vector &left, Vector &right, const Selec
919
919
  // true := A == B with nulls being equal, inputs selected
920
920
  idx_t VectorOperations::NestedEquals(Vector &left, Vector &right, const SelectionVector &sel, idx_t count,
921
921
  SelectionVector *true_sel, SelectionVector *false_sel) {
922
- return TemplatedDistinctSelectOperation<duckdb::NotDistinctFrom>(left, right, &sel, count, true_sel, false_sel);
922
+ return count -
923
+ TemplatedDistinctSelectOperation<duckdb::DistinctFrom>(left, right, &sel, count, false_sel, true_sel);
923
924
  }
924
925
 
925
926
  } // namespace duckdb
@@ -266,12 +266,12 @@ idx_t VectorOperations::GreaterThanEquals(Vector &left, Vector &right, const Sel
266
266
 
267
267
  idx_t VectorOperations::LessThan(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
268
268
  SelectionVector *true_sel, SelectionVector *false_sel) {
269
- return TemplatedSelectOperation<duckdb::LessThan>(left, right, sel, count, true_sel, false_sel);
269
+ return TemplatedSelectOperation<duckdb::GreaterThan>(right, left, sel, count, true_sel, false_sel);
270
270
  }
271
271
 
272
272
  idx_t VectorOperations::LessThanEquals(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
273
273
  SelectionVector *true_sel, SelectionVector *false_sel) {
274
- return TemplatedSelectOperation<duckdb::LessThanEquals>(left, right, sel, count, true_sel, false_sel);
274
+ return TemplatedSelectOperation<duckdb::GreaterThanEquals>(right, left, sel, count, true_sel, false_sel);
275
275
  }
276
276
 
277
277
  idx_t ExpressionExecutor::Select(const BoundComparisonExpression &expr, ExpressionState *state,
@@ -6,6 +6,7 @@
6
6
  #include "duckdb/storage/arena_allocator.hpp"
7
7
  #include "duckdb/execution/index/art/art_key.hpp"
8
8
  #include "duckdb/common/types/conflict_manager.hpp"
9
+ #include "duckdb/storage/table/scan_state.hpp"
9
10
 
10
11
  #include <algorithm>
11
12
  #include <cstring>
@@ -13,6 +14,18 @@
13
14
 
14
15
  namespace duckdb {
15
16
 
17
+ struct ARTIndexScanState : public IndexScanState {
18
+
19
+ //! Scan predicates (single predicate scan or range scan)
20
+ Value values[2];
21
+ //! Expressions of the scan predicates
22
+ ExpressionType expressions[2];
23
+ bool checked = false;
24
+ //! All scanned row IDs
25
+ vector<row_t> result_ids;
26
+ Iterator iterator;
27
+ };
28
+
16
29
  ART::ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
17
30
  const vector<unique_ptr<Expression>> &unbound_expressions, IndexConstraintType constraint_type,
18
31
  AttachedDatabase &db, bool track_memory, idx_t block_id, idx_t block_offset)
@@ -3,13 +3,13 @@
3
3
  #include "duckdb/catalog/catalog_entry/aggregate_function_catalog_entry.hpp"
4
4
  #include "duckdb/common/vector_operations/vector_operations.hpp"
5
5
  #include "duckdb/execution/aggregate_hashtable.hpp"
6
- #include "duckdb/execution/partitionable_hashtable.hpp"
7
6
  #include "duckdb/main/client_context.hpp"
8
7
  #include "duckdb/parallel/pipeline.hpp"
9
8
  #include "duckdb/parallel/task_scheduler.hpp"
10
9
  #include "duckdb/parallel/thread_context.hpp"
11
10
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
12
11
  #include "duckdb/planner/expression/bound_constant_expression.hpp"
12
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
13
13
  #include "duckdb/parallel/base_pipeline_event.hpp"
14
14
  #include "duckdb/common/atomic.hpp"
15
15
  #include "duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp"
@@ -1,5 +1,7 @@
1
1
  #include "duckdb/execution/operator/join/physical_hash_join.hpp"
2
2
 
3
+ #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
4
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
3
5
  #include "duckdb/common/types/column_data_collection.hpp"
4
6
  #include "duckdb/common/vector_operations/vector_operations.hpp"
5
7
  #include "duckdb/execution/expression_executor.hpp"
@@ -12,6 +12,7 @@
12
12
  #include "duckdb/storage/table/append_state.hpp"
13
13
  #include "duckdb/transaction/duck_transaction.hpp"
14
14
  #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
15
+ #include "duckdb/storage/table/scan_state.hpp"
15
16
 
16
17
  namespace duckdb {
17
18
 
@@ -61,9 +61,6 @@ void PhysicalJoin::BuildJoinPipelines(Pipeline &current, MetaPipeline &meta_pipe
61
61
  bool add_child_pipeline = false;
62
62
  auto &join_op = (PhysicalJoin &)op;
63
63
  if (IsRightOuterJoin(join_op.join_type)) {
64
- if (meta_pipeline.HasRecursiveCTE()) {
65
- throw NotImplementedException("FULL and RIGHT outer joins are not supported in recursive CTEs yet");
66
- }
67
64
  add_child_pipeline = true;
68
65
  }
69
66
 
@@ -320,7 +320,11 @@ bool BaseCSVReader::AddRow(DataChunk &insert_chunk, idx_t &column, string &error
320
320
  }
321
321
 
322
322
  if (column < return_types.size() && mode != ParserMode::SNIFFING_DIALECT) {
323
- if (options.ignore_errors) {
323
+ if (options.null_padding) {
324
+ for (; column < return_types.size(); column++) {
325
+ FlatVector::SetNull(parse_chunk.data[column], parse_chunk.size(), true);
326
+ }
327
+ } else if (options.ignore_errors) {
324
328
  column = 0;
325
329
  return false;
326
330
  } else {
@@ -419,6 +419,7 @@ void BufferedCSVReader::DetectDialect(const vector<LogicalType> &requested_types
419
419
  }
420
420
 
421
421
  idx_t best_consistent_rows = 0;
422
+ idx_t prev_padding_count = 0;
422
423
  for (auto quoterule : quoterule_candidates) {
423
424
  const auto &quote_candidates = quote_candidates_map[static_cast<uint8_t>(quoterule)];
424
425
  for (const auto &quote : quote_candidates) {
@@ -441,20 +442,29 @@ void BufferedCSVReader::DetectDialect(const vector<LogicalType> &requested_types
441
442
 
442
443
  idx_t start_row = original_options.skip_rows;
443
444
  idx_t consistent_rows = 0;
444
- idx_t num_cols = 0;
445
-
445
+ idx_t num_cols = sniffed_column_counts.empty() ? 0 : sniffed_column_counts[0];
446
+ idx_t padding_count = 0;
447
+ bool allow_padding = original_options.null_padding;
446
448
  for (idx_t row = 0; row < sniffed_column_counts.size(); row++) {
447
449
  if (sniffed_column_counts[row] == num_cols) {
448
450
  consistent_rows++;
449
- } else {
451
+ } else if (num_cols < sniffed_column_counts[row] && !original_options.skip_rows_set) {
452
+ // we use the maximum amount of num_cols that we find
450
453
  num_cols = sniffed_column_counts[row];
451
454
  start_row = row + original_options.skip_rows;
452
455
  consistent_rows = 1;
456
+ padding_count = 0;
457
+ } else if (num_cols >= sniffed_column_counts[row] && allow_padding) {
458
+ // we are missing some columns, we can parse this as long as we add padding
459
+ padding_count++;
453
460
  }
454
461
  }
455
462
 
456
463
  // some logic
464
+ consistent_rows += padding_count;
457
465
  bool more_values = (consistent_rows > best_consistent_rows && num_cols >= best_num_cols);
466
+ bool require_more_padding = padding_count > prev_padding_count;
467
+ bool require_less_padding = padding_count < prev_padding_count;
458
468
  bool single_column_before = best_num_cols < 2 && num_cols > best_num_cols;
459
469
  bool rows_consistent =
460
470
  start_row + consistent_rows - original_options.skip_rows == sniffed_column_counts.size();
@@ -464,16 +474,19 @@ void BufferedCSVReader::DetectDialect(const vector<LogicalType> &requested_types
464
474
 
465
475
  if (!requested_types.empty() && requested_types.size() != num_cols) {
466
476
  continue;
467
- } else if ((more_values || single_column_before) && rows_consistent) {
477
+ } else if (rows_consistent && (single_column_before || (more_values && !require_more_padding) ||
478
+ (more_than_one_column && require_less_padding))) {
468
479
  sniff_info.skip_rows = start_row;
469
480
  sniff_info.num_cols = num_cols;
470
481
  sniff_info.new_line = options.new_line;
471
482
  best_consistent_rows = consistent_rows;
472
483
  best_num_cols = num_cols;
484
+ prev_padding_count = padding_count;
473
485
 
474
486
  info_candidates.clear();
475
487
  info_candidates.push_back(sniff_info);
476
- } else if (more_than_one_row && more_than_one_column && start_good && rows_consistent) {
488
+ } else if (more_than_one_row && more_than_one_column && start_good && rows_consistent &&
489
+ !require_more_padding) {
477
490
  bool same_quote_is_candidate = false;
478
491
  for (auto &info_candidate : info_candidates) {
479
492
  if (quote.compare(info_candidate.quote) == 0) {
@@ -145,6 +145,7 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
145
145
  }
146
146
  } else if (loption == "skip") {
147
147
  skip_rows = ParseInteger(value, loption);
148
+ skip_rows_set = true;
148
149
  } else if (loption == "max_line_size" || loption == "maximum_line_size") {
149
150
  maximum_line_size = ParseInteger(value, loption);
150
151
  } else if (loption == "sample_chunk_size") {
@@ -183,6 +184,8 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
183
184
  if (decimal_separator != "." && decimal_separator != ",") {
184
185
  throw BinderException("Unsupported parameter for DECIMAL_SEPARATOR: should be '.' or ','");
185
186
  }
187
+ } else if (loption == "null_padding") {
188
+ null_padding = ParseBoolean(value, loption);
186
189
  } else {
187
190
  throw BinderException("Unrecognized option for CSV reader \"%s\"", loption);
188
191
  }
@@ -1,12 +1,13 @@
1
1
  #include "duckdb/execution/operator/persistent/physical_batch_insert.hpp"
2
2
 
3
3
  #include "duckdb/parallel/thread_context.hpp"
4
- #include "duckdb/parser/parsed_data/create_table_info.hpp"
5
4
  #include "duckdb/storage/data_table.hpp"
6
5
  #include "duckdb/storage/table/row_group_collection.hpp"
7
6
  #include "duckdb/storage/table_io_manager.hpp"
8
7
  #include "duckdb/transaction/local_storage.hpp"
9
8
  #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
9
+ #include "duckdb/storage/table/append_state.hpp"
10
+ #include "duckdb/storage/table/scan_state.hpp"
10
11
 
11
12
  namespace duckdb {
12
13
 
@@ -2,11 +2,9 @@
2
2
 
3
3
  #include "duckdb/execution/expression_executor.hpp"
4
4
  #include "duckdb/storage/data_table.hpp"
5
- #include "duckdb/transaction/transaction.hpp"
6
5
  #include "duckdb/transaction/duck_transaction.hpp"
7
6
  #include "duckdb/common/types/column_data_collection.hpp"
8
-
9
- #include "duckdb/common/atomic.hpp"
7
+ #include "duckdb/storage/table/scan_state.hpp"
10
8
 
11
9
  namespace duckdb {
12
10
 
@@ -16,6 +16,7 @@
16
16
  #include "duckdb/common/types/conflict_manager.hpp"
17
17
  #include "duckdb/execution/index/art/art.hpp"
18
18
  #include "duckdb/transaction/duck_transaction.hpp"
19
+ #include "duckdb/storage/table/append_state.hpp"
19
20
 
20
21
  namespace duckdb {
21
22
 
@@ -180,10 +180,6 @@ void PhysicalRecursiveCTE::BuildPipelines(Pipeline &current, MetaPipeline &meta_
180
180
  auto &executor = meta_pipeline.GetExecutor();
181
181
  executor.AddRecursiveCTE(this);
182
182
 
183
- if (meta_pipeline.HasRecursiveCTE()) {
184
- throw InternalException("Recursive CTE detected WITHIN a recursive CTE node");
185
- }
186
-
187
183
  // the LHS of the recursive CTE is our initial state
188
184
  auto initial_state_pipeline = meta_pipeline.CreateChildMetaPipeline(current, this);
189
185
  initial_state_pipeline->Build(children[0].get());
@@ -10,6 +10,7 @@
10
10
  #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
11
11
  #include "duckdb/planner/operator/logical_aggregate.hpp"
12
12
  #include "duckdb/function/function_binder.hpp"
13
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
13
14
 
14
15
  namespace duckdb {
15
16
 
@@ -10,10 +10,10 @@
10
10
  #include "duckdb/function/table/table_scan.hpp"
11
11
  #include "duckdb/main/client_context.hpp"
12
12
  #include "duckdb/planner/operator/logical_comparison_join.hpp"
13
-
14
13
  #include "duckdb/transaction/duck_transaction.hpp"
15
14
  #include "duckdb/common/operator/subtract.hpp"
16
15
  #include "duckdb/execution/operator/join/physical_blockwise_nl_join.hpp"
16
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
17
17
  #include "duckdb/planner/expression_iterator.hpp"
18
18
  #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp"
19
19
 
@@ -6,8 +6,9 @@
6
6
  #include "duckdb/execution/operator/order/physical_order.hpp"
7
7
  #include "duckdb/execution/physical_plan_generator.hpp"
8
8
  #include "duckdb/function/table/table_scan.hpp"
9
- #include "duckdb/planner/filter/null_filter.hpp"
10
9
  #include "duckdb/planner/operator/logical_create_index.hpp"
10
+ #include "duckdb/planner/expression/bound_operator_expression.hpp"
11
+ #include "duckdb/planner/expression/bound_reference_expression.hpp"
11
12
  #include "duckdb/planner/table_filter.hpp"
12
13
 
13
14
  namespace duckdb {