duckdb 0.7.2-dev904.0 → 0.7.2-dev982.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/src/common/enums/logical_operator_type.cpp +2 -0
  3. package/src/duckdb/src/common/serializer/enum_serializer.cpp +4 -0
  4. package/src/duckdb/src/common/types/value.cpp +46 -0
  5. package/src/duckdb/src/execution/column_binding_resolver.cpp +15 -5
  6. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +40 -19
  7. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +2 -0
  8. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +3 -3
  9. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +5 -13
  10. package/src/duckdb/src/execution/operator/projection/physical_projection.cpp +34 -0
  11. package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +97 -0
  12. package/src/duckdb/src/execution/physical_plan_generator.cpp +3 -0
  13. package/src/duckdb/src/function/scalar/math/numeric.cpp +87 -0
  14. package/src/duckdb/src/function/scalar/math_functions.cpp +3 -0
  15. package/src/duckdb/src/function/scalar/string/hex.cpp +201 -0
  16. package/src/duckdb/src/function/scalar/string_functions.cpp +1 -0
  17. package/src/duckdb/src/function/table/read_csv.cpp +46 -0
  18. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  19. package/src/duckdb/src/include/duckdb/common/enums/joinref_type.hpp +5 -4
  20. package/src/duckdb/src/include/duckdb/common/enums/logical_operator_type.hpp +1 -0
  21. package/src/duckdb/src/include/duckdb/common/string_util.hpp +13 -0
  22. package/src/duckdb/src/include/duckdb/common/types/value.hpp +11 -7
  23. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +2 -2
  24. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_cross_product.hpp +2 -0
  25. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +6 -0
  26. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_projection.hpp +5 -0
  27. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +1 -0
  28. package/src/duckdb/src/include/duckdb/function/scalar/math_functions.hpp +8 -0
  29. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +4 -0
  30. package/src/duckdb/src/include/duckdb/planner/logical_tokens.hpp +1 -0
  31. package/src/duckdb/src/include/duckdb/planner/operator/list.hpp +1 -0
  32. package/src/duckdb/src/include/duckdb/planner/operator/logical_asof_join.hpp +22 -0
  33. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +5 -2
  34. package/src/duckdb/src/include/duckdb.h +1 -1
  35. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  36. package/src/duckdb/src/optimizer/filter_pullup.cpp +3 -1
  37. package/src/duckdb/src/optimizer/filter_pushdown.cpp +3 -1
  38. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +4 -0
  39. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +8 -4
  40. package/src/duckdb/src/optimizer/pullup/pullup_from_left.cpp +2 -2
  41. package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -1
  42. package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +3 -0
  43. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +4 -2
  44. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +1 -1
  45. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +1 -0
  46. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
  47. package/src/duckdb/src/optimizer/statistics_propagator.cpp +1 -0
  48. package/src/duckdb/src/parser/tableref/joinref.cpp +4 -0
  49. package/src/duckdb/src/parser/transform/tableref/transform_join.cpp +8 -1
  50. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +10 -3
  51. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +60 -12
  52. package/src/duckdb/src/planner/logical_operator.cpp +3 -0
  53. package/src/duckdb/src/planner/logical_operator_visitor.cpp +1 -0
  54. package/src/duckdb/src/planner/operator/logical_asof_join.cpp +8 -0
  55. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +3 -1
  56. package/src/duckdb/third_party/libpg_query/include/nodes/nodes.hpp +32 -0
  57. package/src/duckdb/third_party/libpg_query/include/nodes/primnodes.hpp +3 -3
  58. package/src/duckdb/third_party/libpg_query/include/parser/gram.hpp +915 -913
  59. package/src/duckdb/third_party/libpg_query/include/parser/kwlist.hpp +1 -0
  60. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +17371 -17306
  61. package/src/duckdb/ub_src_execution_physical_plan.cpp +2 -0
  62. package/src/duckdb/ub_src_function_scalar_string.cpp +2 -0
  63. package/src/duckdb/ub_src_planner_operator.cpp +2 -0
@@ -0,0 +1,201 @@
1
+ #include "duckdb/common/exception.hpp"
2
+ #include "duckdb/common/string_util.hpp"
3
+ #include "duckdb/common/types/blob.hpp"
4
+ #include "duckdb/common/vector_operations/unary_executor.hpp"
5
+ #include "duckdb/common/vector_operations/vector_operations.hpp"
6
+ #include "duckdb/function/scalar/string_functions.hpp"
7
+
8
+ namespace duckdb {
9
+
10
+ struct HexStrOperator {
11
+ template <class INPUT_TYPE, class RESULT_TYPE>
12
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
13
+ auto data = input.GetDataUnsafe();
14
+ auto size = input.GetSize();
15
+
16
+ // Allocate empty space
17
+ auto target = StringVector::EmptyString(result, size * 2);
18
+ auto output = target.GetDataWriteable();
19
+
20
+ for (idx_t i = 0; i < size; ++i) {
21
+ *output = Blob::HEX_TABLE[(data[i] >> 4) & 0x0F];
22
+ output++;
23
+ *output = Blob::HEX_TABLE[data[i] & 0x0F];
24
+ output++;
25
+ }
26
+
27
+ target.Finalize();
28
+ return target;
29
+ }
30
+ };
31
+
32
+ struct FromHexOperator {
33
+ template <class INPUT_TYPE, class RESULT_TYPE>
34
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
35
+ auto data = input.GetDataUnsafe();
36
+ auto size = input.GetSize();
37
+
38
+ if (size > NumericLimits<uint32_t>::Maximum()) {
39
+ throw InvalidInputException("Hexadecimal input length larger than 2^32 are not supported");
40
+ }
41
+
42
+ D_ASSERT(size <= NumericLimits<uint32_t>::Maximum());
43
+ auto buffer_size = (size + 1) / 2;
44
+
45
+ // Allocate empty space
46
+ auto target = StringVector::EmptyString(result, buffer_size);
47
+ auto output = target.GetDataWriteable();
48
+
49
+ // Treated as a single byte
50
+ idx_t i = 0;
51
+ if (size % 2 != 0) {
52
+ *output = StringUtil::GetHexValue(data[i]);
53
+ i++;
54
+ output++;
55
+ }
56
+
57
+ for (; i < size; i += 2) {
58
+ uint8_t major = StringUtil::GetHexValue(data[i]);
59
+ uint8_t minor = StringUtil::GetHexValue(data[i + 1]);
60
+ *output = (major << 4) | minor;
61
+ output++;
62
+ }
63
+
64
+ target.Finalize();
65
+ return target;
66
+ }
67
+ };
68
+
69
+ struct HexIntegralOperator {
70
+ template <class INPUT_TYPE, class RESULT_TYPE>
71
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
72
+ // Sufficient space for maximum length
73
+ char buffer[sizeof(INPUT_TYPE) * 2];
74
+ char *ptr = buffer;
75
+ idx_t buffer_size = 0;
76
+
77
+ bool seen_non_zero = false;
78
+ for (idx_t offset = sizeof(INPUT_TYPE) * 8; offset >= 4; offset -= 4) {
79
+ uint8_t byte = (input >> (offset - 4)) & 0x0F;
80
+ if (byte == 0 && !seen_non_zero && offset > 4) {
81
+ continue;
82
+ }
83
+ seen_non_zero = true;
84
+ *ptr = Blob::HEX_TABLE[byte];
85
+ ptr++;
86
+ buffer_size++;
87
+ }
88
+
89
+ // Allocate empty space
90
+ auto target = StringVector::EmptyString(result, buffer_size);
91
+ auto output = target.GetDataWriteable();
92
+ memcpy(output, buffer, buffer_size);
93
+
94
+ target.Finalize();
95
+ return target;
96
+ }
97
+ };
98
+
99
+ struct HexHugeIntOperator {
100
+ template <class INPUT_TYPE, class RESULT_TYPE>
101
+ static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
102
+ char buffer[sizeof(INPUT_TYPE) * 2];
103
+ char *ptr = buffer;
104
+ idx_t buffer_size = 0;
105
+
106
+ uint64_t lower = input.lower;
107
+ int64_t upper = input.upper;
108
+
109
+ bool seen_non_zero = false;
110
+ for (idx_t offset = 64; offset >= 4; offset -= 4) {
111
+ uint8_t byte = (upper >> (offset - 4)) & 0x0F;
112
+
113
+ if (byte == 0 && !seen_non_zero) {
114
+ continue;
115
+ }
116
+ seen_non_zero = true;
117
+ *ptr = Blob::HEX_TABLE[byte];
118
+ ptr++;
119
+ buffer_size++;
120
+ }
121
+
122
+ for (idx_t offset = 64; offset >= 4; offset -= 4) {
123
+ uint8_t byte = (lower >> (offset - 4)) & 0x0F;
124
+
125
+ // at least one byte space
126
+ if (byte == 0 && !seen_non_zero && offset > 4) {
127
+ continue;
128
+ }
129
+ seen_non_zero = true;
130
+ *ptr = Blob::HEX_TABLE[byte];
131
+ ptr++;
132
+ buffer_size++;
133
+ }
134
+
135
+ // Allocate empty space
136
+ auto target = StringVector::EmptyString(result, buffer_size);
137
+ auto output = target.GetDataWriteable();
138
+ memcpy(output, buffer, buffer_size);
139
+
140
+ target.Finalize();
141
+ return target;
142
+ }
143
+ };
144
+
145
+ static void ToHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
146
+ D_ASSERT(args.ColumnCount() == 1);
147
+ auto &input = args.data[0];
148
+ idx_t count = args.size();
149
+
150
+ switch (input.GetType().InternalType()) {
151
+ case PhysicalType::VARCHAR:
152
+ UnaryExecutor::ExecuteString<string_t, string_t, HexStrOperator>(input, result, count);
153
+ break;
154
+ case PhysicalType::INT64:
155
+ UnaryExecutor::ExecuteString<int64_t, string_t, HexIntegralOperator>(input, result, count);
156
+ break;
157
+ case PhysicalType::INT128:
158
+ UnaryExecutor::ExecuteString<hugeint_t, string_t, HexHugeIntOperator>(input, result, count);
159
+ break;
160
+ case PhysicalType::UINT64:
161
+ UnaryExecutor::ExecuteString<uint64_t, string_t, HexIntegralOperator>(input, result, count);
162
+ break;
163
+ default:
164
+ throw NotImplementedException("Specifier type not implemented");
165
+ }
166
+ }
167
+
168
+ static void FromHexFunction(DataChunk &args, ExpressionState &state, Vector &result) {
169
+ D_ASSERT(args.ColumnCount() == 1);
170
+ D_ASSERT(args.data[0].GetType().InternalType() == PhysicalType::VARCHAR);
171
+ auto &input = args.data[0];
172
+ idx_t count = args.size();
173
+
174
+ UnaryExecutor::ExecuteString<string_t, string_t, FromHexOperator>(input, result, count);
175
+ }
176
+
177
+ void HexFun::RegisterFunction(BuiltinFunctions &set) {
178
+ ScalarFunctionSet to_hex("to_hex");
179
+ ScalarFunctionSet from_hex("from_hex");
180
+
181
+ to_hex.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToHexFunction));
182
+
183
+ to_hex.AddFunction(ScalarFunction({LogicalType::BIGINT}, LogicalType::VARCHAR, ToHexFunction));
184
+
185
+ to_hex.AddFunction(ScalarFunction({LogicalType::UBIGINT}, LogicalType::VARCHAR, ToHexFunction));
186
+
187
+ to_hex.AddFunction(ScalarFunction({LogicalType::HUGEINT}, LogicalType::VARCHAR, ToHexFunction));
188
+
189
+ from_hex.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::BLOB, FromHexFunction));
190
+
191
+ set.AddFunction(to_hex);
192
+ set.AddFunction(from_hex);
193
+
194
+ // mysql
195
+ to_hex.name = "hex";
196
+ from_hex.name = "unhex";
197
+ set.AddFunction(to_hex);
198
+ set.AddFunction(from_hex);
199
+ }
200
+
201
+ } // namespace duckdb
@@ -45,6 +45,7 @@ void BuiltinFunctions::RegisterStringFunctions() {
45
45
  // blob functions
46
46
  Register<Base64Fun>();
47
47
  Register<EncodeFun>();
48
+ Register<HexFun>();
48
49
 
49
50
  // bit functions
50
51
  Register<GetBitFun>();
@@ -53,6 +53,25 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
53
53
  }
54
54
  }
55
55
 
56
+ uint8_t GetCandidateSpecificity(const LogicalType &candidate_type) {
57
+ //! Const ht with accepted auto_types and their weights in specificity
58
+ const duckdb::unordered_map<uint8_t, uint8_t> auto_type_candidates_specificity {
59
+ {(uint8_t)LogicalTypeId::VARCHAR, 0}, {(uint8_t)LogicalTypeId::TIMESTAMP, 1},
60
+ {(uint8_t)LogicalTypeId::DATE, 2}, {(uint8_t)LogicalTypeId::TIME, 3},
61
+ {(uint8_t)LogicalTypeId::DOUBLE, 4}, {(uint8_t)LogicalTypeId::FLOAT, 5},
62
+ {(uint8_t)LogicalTypeId::BIGINT, 6}, {(uint8_t)LogicalTypeId::INTEGER, 7},
63
+ {(uint8_t)LogicalTypeId::SMALLINT, 8}, {(uint8_t)LogicalTypeId::TINYINT, 9},
64
+ {(uint8_t)LogicalTypeId::BOOLEAN, 10}, {(uint8_t)LogicalTypeId::SQLNULL, 11}};
65
+
66
+ auto id = (uint8_t)candidate_type.id();
67
+ auto it = auto_type_candidates_specificity.find(id);
68
+ if (it == auto_type_candidates_specificity.end()) {
69
+ throw BinderException("Auto Type Candidate of type %s is not accepted as a valid input",
70
+ LogicalTypeIdToString(candidate_type.id()));
71
+ }
72
+ return it->second;
73
+ }
74
+
56
75
  static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctionBindInput &input,
57
76
  vector<LogicalType> &return_types, vector<string> &names) {
58
77
  auto &config = DBConfig::GetConfig(context);
@@ -105,6 +124,32 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
105
124
  if (names.empty()) {
106
125
  throw BinderException("read_csv requires at least a single column as input!");
107
126
  }
127
+ } else if (loption == "auto_type_candidates") {
128
+ options.auto_type_candidates.clear();
129
+ map<uint8_t, LogicalType> candidate_types;
130
+ // We always have the extremes of Null and Varchar, so we can default to varchar if the
131
+ // sniffer is not able to confidently detect that column type
132
+ candidate_types[GetCandidateSpecificity(LogicalType::VARCHAR)] = LogicalType::VARCHAR;
133
+ candidate_types[GetCandidateSpecificity(LogicalType::SQLNULL)] = LogicalType::SQLNULL;
134
+
135
+ auto &child_type = kv.second.type();
136
+ if (child_type.id() != LogicalTypeId::LIST) {
137
+ throw BinderException("read_csv auto_types requires a list as input");
138
+ }
139
+ auto &list_children = ListValue::GetChildren(kv.second);
140
+ if (list_children.empty()) {
141
+ throw BinderException("auto_type_candidates requires at least one type");
142
+ }
143
+ for (auto &child : list_children) {
144
+ if (child.type().id() != LogicalTypeId::VARCHAR) {
145
+ throw BinderException("auto_type_candidates requires a type specification as string");
146
+ }
147
+ auto candidate_type = TransformStringToLogicalType(StringValue::Get(child), context);
148
+ candidate_types[GetCandidateSpecificity(candidate_type)] = candidate_type;
149
+ }
150
+ for (auto &candidate_type : candidate_types) {
151
+ options.auto_type_candidates.emplace_back(candidate_type.second);
152
+ }
108
153
  } else if (loption == "column_names" || loption == "names") {
109
154
  if (!options.name_list.empty()) {
110
155
  throw BinderException("read_csv_auto column_names/names can only be supplied once");
@@ -795,6 +840,7 @@ static void ReadCSVAddNamedParameters(TableFunction &table_function) {
795
840
  table_function.named_parameters["escape"] = LogicalType::VARCHAR;
796
841
  table_function.named_parameters["nullstr"] = LogicalType::VARCHAR;
797
842
  table_function.named_parameters["columns"] = LogicalType::ANY;
843
+ table_function.named_parameters["auto_type_candidates"] = LogicalType::ANY;
798
844
  table_function.named_parameters["header"] = LogicalType::BOOLEAN;
799
845
  table_function.named_parameters["auto_detect"] = LogicalType::BOOLEAN;
800
846
  table_function.named_parameters["sample_size"] = LogicalType::BIGINT;
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev904"
2
+ #define DUCKDB_VERSION "0.7.2-dev982"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "56a94e3a49"
5
+ #define DUCKDB_SOURCE_ID "d43e34e8ba"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -16,10 +16,11 @@ namespace duckdb {
16
16
  // Join Reference Types
17
17
  //===--------------------------------------------------------------------===//
18
18
  enum class JoinRefType : uint8_t {
19
- REGULAR, // Explicit conditions
20
- NATURAL, // Implied conditions
21
- CROSS, // No condition
22
- POSITIONAL // Positional condition
19
+ REGULAR, // Explicit conditions
20
+ NATURAL, // Implied conditions
21
+ CROSS, // No condition
22
+ POSITIONAL, // Positional condition
23
+ ASOF // AsOf conditions
23
24
  };
24
25
 
25
26
  const char *ToString(JoinRefType value);
@@ -49,6 +49,7 @@ enum class LogicalOperatorType : uint8_t {
49
49
  LOGICAL_ANY_JOIN = 53,
50
50
  LOGICAL_CROSS_PRODUCT = 54,
51
51
  LOGICAL_POSITIONAL_JOIN = 55,
52
+ LOGICAL_ASOF_JOIN = 56,
52
53
  // -----------------------------
53
54
  // SetOps
54
55
  // -----------------------------
@@ -21,6 +21,19 @@ namespace duckdb {
21
21
  */
22
22
  class StringUtil {
23
23
  public:
24
+ static uint8_t GetHexValue(char c) {
25
+ if (c >= '0' && c <= '9') {
26
+ return c - '0';
27
+ }
28
+ if (c >= 'a' && c <= 'f') {
29
+ return c - 'a' + 10;
30
+ }
31
+ if (c >= 'A' && c <= 'F') {
32
+ return c - 'A' + 10;
33
+ }
34
+ throw InvalidInputException("Invalid input for hex digit: %s", string(c, 1));
35
+ }
36
+
24
37
  DUCKDB_API static bool CharacterIsSpace(char c) {
25
38
  return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
26
39
  }
@@ -67,7 +67,7 @@ public:
67
67
  inline LogicalType &GetTypeMutable() {
68
68
  return type_;
69
69
  }
70
- inline const LogicalType &type() const {
70
+ inline const LogicalType &type() const { // NOLINT
71
71
  return type_;
72
72
  }
73
73
  inline bool IsNull() const {
@@ -78,6 +78,10 @@ public:
78
78
  DUCKDB_API static Value MinimumValue(const LogicalType &type);
79
79
  //! Create the highest possible value of a given type (numeric only)
80
80
  DUCKDB_API static Value MaximumValue(const LogicalType &type);
81
+ //! Create the negative infinite value of a given type (numeric only)
82
+ DUCKDB_API static Value NegativeInfinity(const LogicalType &type);
83
+ //! Create the positive infinite value of a given type (numeric only)
84
+ DUCKDB_API static Value Infinity(const LogicalType &type);
81
85
  //! Create a Numeric value of the specified type with the specified value
82
86
  DUCKDB_API static Value Numeric(const LogicalType &type, int64_t value);
83
87
  DUCKDB_API static Value Numeric(const LogicalType &type, hugeint_t value);
@@ -161,7 +165,7 @@ public:
161
165
 
162
166
  //! Create a blob Value from a data pointer and a length: no bytes are interpreted
163
167
  DUCKDB_API static Value BLOB(const_data_ptr_t data, idx_t len);
164
- DUCKDB_API static Value BLOB_RAW(const string &data) {
168
+ DUCKDB_API static Value BLOB_RAW(const string &data) { // NOLINT
165
169
  return Value::BLOB((const_data_ptr_t)data.c_str(), data.size());
166
170
  }
167
171
  //! Creates a blob by casting a specified string to a blob (i.e. interpreting \x characters)
@@ -280,7 +284,7 @@ public:
280
284
 
281
285
  private:
282
286
  //! The logical of the value
283
- LogicalType type_;
287
+ LogicalType type_; // NOLINT
284
288
 
285
289
  //! Whether or not the value is NULL
286
290
  bool is_null;
@@ -297,17 +301,17 @@ private:
297
301
  uint32_t uinteger;
298
302
  uint64_t ubigint;
299
303
  hugeint_t hugeint;
300
- float float_;
301
- double double_;
304
+ float float_; // NOLINT
305
+ double double_; // NOLINT
302
306
  uintptr_t pointer;
303
307
  uint64_t hash;
304
308
  date_t date;
305
309
  dtime_t time;
306
310
  timestamp_t timestamp;
307
311
  interval_t interval;
308
- } value_;
312
+ } value_; // NOLINT
309
313
 
310
- shared_ptr<ExtraValueInfo> value_info_;
314
+ shared_ptr<ExtraValueInfo> value_info_; // NOLINT
311
315
 
312
316
  private:
313
317
  template <class T>
@@ -209,8 +209,8 @@ public:
209
209
 
210
210
  template <class INPUT_TYPE, class RESULT_TYPE, class OP>
211
211
  static void ExecuteString(Vector &input, Vector &result, idx_t count) {
212
- UnaryExecutor::GenericExecute<string_t, string_t, UnaryStringOperator<OP>>(input, result, count,
213
- (void *)&result);
212
+ UnaryExecutor::GenericExecute<INPUT_TYPE, RESULT_TYPE, UnaryStringOperator<OP>>(input, result, count,
213
+ (void *)&result);
214
214
  }
215
215
  };
216
216
 
@@ -60,10 +60,12 @@ public:
60
60
 
61
61
  OperatorResultType Execute(DataChunk &input, DataChunk &output);
62
62
 
63
+ // returns if the left side is scanned as a constant vector
63
64
  bool ScanLHS() {
64
65
  return scan_input_chunk;
65
66
  }
66
67
 
68
+ // returns the position in the chunk of chunk scanned as a constant input vector
67
69
  idx_t PositionInChunk() {
68
70
  return position_in_chunk;
69
71
  }
@@ -14,6 +14,7 @@
14
14
  #include "duckdb/common/types/value.hpp"
15
15
  #include "duckdb/common/field_writer.hpp"
16
16
  #include "duckdb/common/case_insensitive_map.hpp"
17
+ #include "duckdb/common/types.hpp"
17
18
 
18
19
  namespace duckdb {
19
20
 
@@ -77,6 +78,11 @@ struct BufferedCSVReaderOptions {
77
78
  vector<LogicalType> sql_type_list;
78
79
  //! User-defined name list
79
80
  vector<string> name_list;
81
+ //! Types considered as candidates for auto detection ordered by descending specificity (~ from high to low)
82
+ vector<LogicalType> auto_type_candidates = {LogicalType::VARCHAR, LogicalType::TIMESTAMP, LogicalType::DATE,
83
+ LogicalType::TIME, LogicalType::DOUBLE, LogicalType::BIGINT,
84
+ LogicalType::BOOLEAN, LogicalType::SQLNULL};
85
+
80
86
  //===--------------------------------------------------------------------===//
81
87
  // ReadCSVOptions
82
88
  //===--------------------------------------------------------------------===//
@@ -30,6 +30,11 @@ public:
30
30
  }
31
31
 
32
32
  string ParamsToString() const override;
33
+
34
+ static unique_ptr<PhysicalOperator>
35
+ CreateJoinProjection(vector<LogicalType> proj_types, const vector<LogicalType> &lhs_types,
36
+ const vector<LogicalType> &rhs_types, const vector<idx_t> &left_projection_map,
37
+ const vector<idx_t> &right_projection_map, const idx_t estimated_cardinality);
33
38
  };
34
39
 
35
40
  } // namespace duckdb
@@ -48,6 +48,7 @@ protected:
48
48
 
49
49
  unique_ptr<PhysicalOperator> CreatePlan(LogicalAggregate &op);
50
50
  unique_ptr<PhysicalOperator> CreatePlan(LogicalAnyJoin &op);
51
+ unique_ptr<PhysicalOperator> CreatePlan(LogicalAsOfJoin &op);
51
52
  unique_ptr<PhysicalOperator> CreatePlan(LogicalColumnDataGet &op);
52
53
  unique_ptr<PhysicalOperator> CreatePlan(LogicalComparisonJoin &op);
53
54
  unique_ptr<PhysicalOperator> CreatePlan(LogicalCreate &op);
@@ -122,4 +122,12 @@ struct IsFiniteFun {
122
122
  static void RegisterFunction(BuiltinFunctions &set);
123
123
  };
124
124
 
125
+ struct GreatestCommonDivisorFun {
126
+ static void RegisterFunction(BuiltinFunctions &set);
127
+ };
128
+
129
+ struct LeastCommonMultipleFun {
130
+ static void RegisterFunction(BuiltinFunctions &set);
131
+ };
132
+
125
133
  } // namespace duckdb
@@ -213,4 +213,8 @@ struct JaroWinklerFun {
213
213
  static void RegisterFunction(BuiltinFunctions &set);
214
214
  };
215
215
 
216
+ struct HexFun {
217
+ static void RegisterFunction(BuiltinFunctions &set);
218
+ };
219
+
216
220
  } // namespace duckdb
@@ -14,6 +14,7 @@ class LogicalOperator;
14
14
 
15
15
  class LogicalAggregate;
16
16
  class LogicalAnyJoin;
17
+ class LogicalAsOfJoin;
17
18
  class LogicalColumnDataGet;
18
19
  class LogicalComparisonJoin;
19
20
  class LogicalCopyToFile;
@@ -1,5 +1,6 @@
1
1
  #include "duckdb/planner/operator/logical_aggregate.hpp"
2
2
  #include "duckdb/planner/operator/logical_any_join.hpp"
3
+ #include "duckdb/planner/operator/logical_asof_join.hpp"
3
4
  #include "duckdb/planner/operator/logical_column_data_get.hpp"
4
5
  #include "duckdb/planner/operator/logical_comparison_join.hpp"
5
6
  #include "duckdb/planner/operator/logical_copy_to_file.hpp"
@@ -0,0 +1,22 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/planner/operator/logical_asof_join.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/planner/operator/logical_comparison_join.hpp"
12
+
13
+ namespace duckdb {
14
+
15
+ //! LogicalAsOfJoin represents a temporal-style join with one less-than inequality.
16
+ //! This inequality matches the greatest value on the right that satisfies the condition.
17
+ class LogicalAsOfJoin : public LogicalComparisonJoin {
18
+ public:
19
+ explicit LogicalAsOfJoin(JoinType type);
20
+ };
21
+
22
+ } // namespace duckdb
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/constants.hpp"
12
+ #include "duckdb/common/enums/joinref_type.hpp"
12
13
  #include "duckdb/common/unordered_set.hpp"
13
14
  #include "duckdb/planner/joinside.hpp"
14
15
  #include "duckdb/planner/operator/logical_join.hpp"
@@ -34,10 +35,12 @@ public:
34
35
  FieldReader &reader);
35
36
 
36
37
  public:
37
- static unique_ptr<LogicalOperator> CreateJoin(JoinType type, unique_ptr<LogicalOperator> left_child,
38
+ static unique_ptr<LogicalOperator> CreateJoin(JoinType type, JoinRefType ref_type,
39
+ unique_ptr<LogicalOperator> left_child,
38
40
  unique_ptr<LogicalOperator> right_child,
39
41
  unique_ptr<Expression> condition);
40
- static unique_ptr<LogicalOperator> CreateJoin(JoinType type, unique_ptr<LogicalOperator> left_child,
42
+ static unique_ptr<LogicalOperator> CreateJoin(JoinType type, JoinRefType ref_type,
43
+ unique_ptr<LogicalOperator> left_child,
41
44
  unique_ptr<LogicalOperator> right_child,
42
45
  vector<JoinCondition> conditions,
43
46
  vector<unique_ptr<Expression>> arbitrary_expressions);
@@ -585,7 +585,7 @@ DUCKDB_API bool duckdb_result_is_streaming(duckdb_result result);
585
585
  Returns the number of data chunks present in the result.
586
586
 
587
587
  * result: The result object
588
- * returns: The resulting data chunk. Returns `NULL` if the chunk index is out of bounds.
588
+ * returns: Number of data chunks present in the result.
589
589
  */
590
590
  DUCKDB_API idx_t duckdb_result_chunk_count(duckdb_result result);
591
591
 
@@ -57,6 +57,7 @@ void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) {
57
57
  analyzer.VisitOperator(*op.children[0]);
58
58
  return;
59
59
  }
60
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
60
61
  case LogicalOperatorType::LOGICAL_DELIM_JOIN:
61
62
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
62
63
  if (everything_referenced) {
@@ -14,6 +14,7 @@ unique_ptr<LogicalOperator> FilterPullup::Rewrite(unique_ptr<LogicalOperator> op
14
14
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
15
15
  case LogicalOperatorType::LOGICAL_ANY_JOIN:
16
16
  case LogicalOperatorType::LOGICAL_DELIM_JOIN:
17
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
17
18
  return PullupJoin(std::move(op));
18
19
  case LogicalOperatorType::LOGICAL_INTERSECT:
19
20
  case LogicalOperatorType::LOGICAL_EXCEPT:
@@ -31,7 +32,8 @@ unique_ptr<LogicalOperator> FilterPullup::Rewrite(unique_ptr<LogicalOperator> op
31
32
 
32
33
  unique_ptr<LogicalOperator> FilterPullup::PullupJoin(unique_ptr<LogicalOperator> op) {
33
34
  D_ASSERT(op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
34
- op->type == LogicalOperatorType::LOGICAL_ANY_JOIN || op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN);
35
+ op->type == LogicalOperatorType::LOGICAL_ASOF_JOIN || op->type == LogicalOperatorType::LOGICAL_ANY_JOIN ||
36
+ op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN);
35
37
  auto &join = (LogicalJoin &)*op;
36
38
 
37
39
  switch (join.join_type) {
@@ -23,6 +23,7 @@ unique_ptr<LogicalOperator> FilterPushdown::Rewrite(unique_ptr<LogicalOperator>
23
23
  return PushdownCrossProduct(std::move(op));
24
24
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
25
25
  case LogicalOperatorType::LOGICAL_ANY_JOIN:
26
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
26
27
  case LogicalOperatorType::LOGICAL_DELIM_JOIN:
27
28
  return PushdownJoin(std::move(op));
28
29
  case LogicalOperatorType::LOGICAL_PROJECTION:
@@ -48,7 +49,8 @@ unique_ptr<LogicalOperator> FilterPushdown::Rewrite(unique_ptr<LogicalOperator>
48
49
 
49
50
  unique_ptr<LogicalOperator> FilterPushdown::PushdownJoin(unique_ptr<LogicalOperator> op) {
50
51
  D_ASSERT(op->type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN ||
51
- op->type == LogicalOperatorType::LOGICAL_ANY_JOIN || op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN);
52
+ op->type == LogicalOperatorType::LOGICAL_ASOF_JOIN || op->type == LogicalOperatorType::LOGICAL_ANY_JOIN ||
53
+ op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN);
52
54
  auto &join = (LogicalJoin &)*op;
53
55
  unordered_set<idx_t> left_bindings, right_bindings;
54
56
  LogicalJoin::GetTableReferences(*op->children[0], left_bindings);
@@ -309,6 +309,7 @@ static LogicalGet *GetLogicalGet(LogicalOperator *op, idx_t table_index = DConst
309
309
  case LogicalOperatorType::LOGICAL_PROJECTION:
310
310
  get = GetLogicalGet(op->children.at(0).get(), table_index);
311
311
  break;
312
+ case LogicalOperatorType::LOGICAL_ASOF_JOIN:
312
313
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
313
314
  LogicalComparisonJoin *join = (LogicalComparisonJoin *)op;
314
315
  // We should never be calling GetLogicalGet without a valid table_index.
@@ -383,6 +384,9 @@ void CardinalityEstimator::InitCardinalityEstimatorProps(vector<NodeOp> *node_op
383
384
  // less than the base table cardinality.
384
385
  join_node->SetCost(join_node->GetBaseTableCardinality());
385
386
  }
387
+ } else if (op->type == LogicalOperatorType::LOGICAL_ASOF_JOIN) {
388
+ // AsOf joins have the cardinality of the LHS
389
+ join_node->SetCost(join_node->GetBaseTableCardinality());
386
390
  }
387
391
  // Total domains can be affected by filters. So we update base table cardinality first
388
392
  EstimateBaseTableCardinality(join_node, op);