duckdb 0.8.2-dev5002.0 → 0.8.2-dev5120.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/icu/icu-strptime.cpp +1 -0
  3. package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
  4. package/src/duckdb/src/common/enum_util.cpp +5 -0
  5. package/src/duckdb/src/common/types/data_chunk.cpp +1 -1
  6. package/src/duckdb/src/common/types.cpp +9 -0
  7. package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +75 -27
  8. package/src/duckdb/src/core_functions/function_list.cpp +1 -1
  9. package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +2 -2
  10. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +13 -7
  11. package/src/duckdb/src/core_functions/scalar/struct/struct_pack.cpp +24 -14
  12. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -2
  13. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +14 -2
  14. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +1 -1
  15. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +13 -8
  16. package/src/duckdb/src/function/cast/struct_cast.cpp +8 -0
  17. package/src/duckdb/src/function/cast/union_casts.cpp +5 -0
  18. package/src/duckdb/src/function/function_binder.cpp +11 -2
  19. package/src/duckdb/src/function/pragma/pragma_functions.cpp +5 -0
  20. package/src/duckdb/src/function/scalar/strftime_format.cpp +29 -8
  21. package/src/duckdb/src/function/table/arrow.cpp +4 -0
  22. package/src/duckdb/src/function/table/copy_csv.cpp +2 -1
  23. package/src/duckdb/src/function/table/read_csv.cpp +4 -1
  24. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  25. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
  26. package/src/duckdb/src/include/duckdb/core_functions/scalar/struct_functions.hpp +5 -2
  27. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +2 -1
  28. package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
  29. package/src/duckdb/src/include/duckdb/main/config.hpp +3 -0
  30. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +1 -0
  31. package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +28 -1
  32. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +0 -18
  33. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +1 -1
  34. package/src/duckdb/src/include/duckdb/planner/extension_callback.hpp +26 -0
  35. package/src/duckdb/src/include/duckdb/planner/planner.hpp +1 -1
  36. package/src/duckdb/src/main/capi/prepared-c.cpp +9 -3
  37. package/src/duckdb/src/main/database.cpp +6 -0
  38. package/src/duckdb/src/main/settings/settings.cpp +4 -0
  39. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +1 -0
  40. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +9 -19
  41. package/src/duckdb/src/planner/bound_parameter_map.cpp +67 -0
  42. package/src/duckdb/src/planner/planner.cpp +2 -2
  43. package/src/duckdb/src/storage/single_file_block_manager.cpp +11 -0
  44. package/src/duckdb/src/storage/table/struct_column_data.cpp +3 -0
  45. package/src/duckdb/ub_src_planner.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev5002.0",
5
+ "version": "0.8.2-dev5120.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -75,6 +75,7 @@ struct ICUStrptime : public ICUDateFunc {
75
75
  calendar->set(UCAL_MINUTE, parsed.data[4]);
76
76
  calendar->set(UCAL_SECOND, parsed.data[5]);
77
77
  calendar->set(UCAL_MILLISECOND, parsed.data[6] / Interval::MICROS_PER_MSEC);
78
+ micros = parsed.data[6] % Interval::MICROS_PER_MSEC;
78
79
 
79
80
  // This overrides the TZ setting, so only use it if an offset was parsed.
80
81
  // Note that we don't bother/worry about the DST setting because the two just combine.
@@ -101,7 +101,7 @@ static BoundStatement CopyToJSONPlan(Binder &binder, CopyStatement &stmt) {
101
101
  info.options["quote"] = {""};
102
102
  info.options["escape"] = {""};
103
103
  info.options["delimiter"] = {"\n"};
104
- info.options["header"] = {0};
104
+ info.options["header"] = {{0}};
105
105
 
106
106
  return binder.Bind(*stmt_copy);
107
107
  }
@@ -5503,6 +5503,8 @@ const char* EnumUtil::ToChars<StrTimeSpecifier>(StrTimeSpecifier value) {
5503
5503
  return "LOCALE_APPROPRIATE_DATE";
5504
5504
  case StrTimeSpecifier::LOCALE_APPROPRIATE_TIME:
5505
5505
  return "LOCALE_APPROPRIATE_TIME";
5506
+ case StrTimeSpecifier::NANOSECOND_PADDED:
5507
+ return "NANOSECOND_PADDED";
5506
5508
  default:
5507
5509
  throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
5508
5510
  }
@@ -5606,6 +5608,9 @@ StrTimeSpecifier EnumUtil::FromString<StrTimeSpecifier>(const char *value) {
5606
5608
  if (StringUtil::Equals(value, "LOCALE_APPROPRIATE_TIME")) {
5607
5609
  return StrTimeSpecifier::LOCALE_APPROPRIATE_TIME;
5608
5610
  }
5611
+ if (StringUtil::Equals(value, "NANOSECOND_PADDED")) {
5612
+ return StrTimeSpecifier::NANOSECOND_PADDED;
5613
+ }
5609
5614
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
5610
5615
  }
5611
5616
 
@@ -263,7 +263,6 @@ void DataChunk::Deserialize(Deserializer &deserializer) {
263
263
 
264
264
  // read and set the row count
265
265
  auto row_count = deserializer.ReadProperty<sel_t>(100, "rows");
266
- SetCardinality(row_count);
267
266
 
268
267
  // read the types
269
268
  vector<LogicalType> types;
@@ -275,6 +274,7 @@ void DataChunk::Deserialize(Deserializer &deserializer) {
275
274
  // initialize the data chunk
276
275
  D_ASSERT(!types.empty());
277
276
  Initialize(Allocator::DefaultAllocator(), types);
277
+ SetCardinality(row_count);
278
278
 
279
279
  // read the data
280
280
  deserializer.ReadList(102, "columns", [&](Deserializer::List &list, idx_t i) {
@@ -659,6 +659,10 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
659
659
  return right;
660
660
  } else if (right.id() == LogicalTypeId::UNKNOWN) {
661
661
  return left;
662
+ } else if ((right.id() == LogicalTypeId::ENUM || left.id() == LogicalTypeId::ENUM) && right.id() != left.id()) {
663
+ // if one is an enum and the other is not, compare strings, not enums
664
+ // see https://github.com/duckdb/duckdb/issues/8561
665
+ return LogicalTypeId::VARCHAR;
662
666
  } else if (left.id() < right.id()) {
663
667
  return right;
664
668
  }
@@ -911,6 +915,11 @@ const string &StructType::GetChildName(const LogicalType &type, idx_t index) {
911
915
  idx_t StructType::GetChildCount(const LogicalType &type) {
912
916
  return StructType::GetChildTypes(type).size();
913
917
  }
918
+ bool StructType::IsUnnamed(const LogicalType &type) {
919
+ auto &child_types = StructType::GetChildTypes(type);
920
+ D_ASSERT(child_types.size() > 0);
921
+ return child_types[0].first.empty();
922
+ }
914
923
 
915
924
  LogicalType LogicalType::STRUCT(child_list_t<LogicalType> children) {
916
925
  auto info = make_shared<StructTypeInfo>(std::move(children));
@@ -288,12 +288,40 @@ struct QuantileCompare {
288
288
  }
289
289
  };
290
290
 
291
+ // Avoid using naked Values in inner loops...
292
+ struct QuantileValue {
293
+ explicit QuantileValue(const Value &v) : val(v), dbl(v.GetValue<double>()) {
294
+ const auto &type = val.type();
295
+ switch (type.id()) {
296
+ case LogicalTypeId::DECIMAL: {
297
+ integral = IntegralValue::Get(v);
298
+ scaling = Hugeint::POWERS_OF_TEN[DecimalType::GetScale(type)];
299
+ break;
300
+ }
301
+ default:
302
+ break;
303
+ }
304
+ }
305
+
306
+ Value val;
307
+
308
+ // DOUBLE
309
+ double dbl;
310
+
311
+ // DECIMAL
312
+ hugeint_t integral;
313
+ hugeint_t scaling;
314
+ };
315
+
316
+ bool operator==(const QuantileValue &x, const QuantileValue &y) {
317
+ return x.val == y.val;
318
+ }
319
+
291
320
  // Continuous interpolation
292
321
  template <bool DISCRETE>
293
322
  struct Interpolator {
294
- Interpolator(const Value &q, const idx_t n_p, const bool desc_p)
295
- : desc(desc_p), RN((double)(n_p - 1) * q.GetValue<double>()), FRN(floor(RN)), CRN(ceil(RN)), begin(0),
296
- end(n_p) {
323
+ Interpolator(const QuantileValue &q, const idx_t n_p, const bool desc_p)
324
+ : desc(desc_p), RN((double)(n_p - 1) * q.dbl), FRN(floor(RN)), CRN(ceil(RN)), begin(0), end(n_p) {
297
325
  }
298
326
 
299
327
  template <class INPUT_TYPE, class TARGET_TYPE, typename ACCESSOR = QuantileDirect<INPUT_TYPE>>
@@ -336,21 +364,20 @@ struct Interpolator {
336
364
  // Discrete "interpolation"
337
365
  template <>
338
366
  struct Interpolator<true> {
339
- static inline idx_t Index(const Value &q, const idx_t n) {
367
+ static inline idx_t Index(const QuantileValue &q, const idx_t n) {
340
368
  idx_t floored;
341
- const auto &type = q.type();
342
- switch (type.id()) {
369
+ switch (q.val.type().id()) {
343
370
  case LogicalTypeId::DECIMAL: {
344
371
  // Integer arithmetic for accuracy
345
- const auto integral = IntegralValue::Get(q);
346
- const auto scaling = Hugeint::POWERS_OF_TEN[DecimalType::GetScale(type)];
372
+ const auto integral = q.integral;
373
+ const auto scaling = q.scaling;
347
374
  const auto scaled_q = DecimalMultiplyOverflowCheck::Operation<hugeint_t, hugeint_t, hugeint_t>(n, integral);
348
375
  const auto scaled_n = DecimalMultiplyOverflowCheck::Operation<hugeint_t, hugeint_t, hugeint_t>(n, scaling);
349
376
  floored = Cast::Operation<hugeint_t, idx_t>((scaled_n - scaled_q) / scaling);
350
377
  break;
351
378
  }
352
379
  default:
353
- const auto scaled_q = (double)(n * q.GetValue<double>());
380
+ const auto scaled_q = (double)(n * q.dbl);
354
381
  floored = floor(n - scaled_q);
355
382
  break;
356
383
  }
@@ -358,7 +385,7 @@ struct Interpolator<true> {
358
385
  return MaxValue<idx_t>(1, n - floored) - 1;
359
386
  }
360
387
 
361
- Interpolator(const Value &q, const idx_t n_p, bool desc_p)
388
+ Interpolator(const QuantileValue &q, const idx_t n_p, bool desc_p)
362
389
  : desc(desc_p), FRN(Index(q, n_p)), CRN(FRN), begin(0), end(n_p) {
363
390
  }
364
391
 
@@ -420,17 +447,18 @@ struct QuantileBindData : public FunctionData {
420
447
  }
421
448
 
422
449
  explicit QuantileBindData(const Value &quantile_p)
423
- : quantiles(1, QuantileAbs(quantile_p)), order(1, 0), desc(quantile_p < 0) {
450
+ : quantiles(1, QuantileValue(QuantileAbs(quantile_p))), order(1, 0), desc(quantile_p < 0) {
424
451
  }
425
452
 
426
453
  explicit QuantileBindData(const vector<Value> &quantiles_p) {
454
+ vector<Value> normalised;
427
455
  size_t pos = 0;
428
456
  size_t neg = 0;
429
457
  for (idx_t i = 0; i < quantiles_p.size(); ++i) {
430
458
  const auto &q = quantiles_p[i];
431
459
  pos += (q > 0);
432
460
  neg += (q < 0);
433
- quantiles.emplace_back(QuantileAbs(q));
461
+ normalised.emplace_back(QuantileAbs(q));
434
462
  order.push_back(i);
435
463
  }
436
464
  if (pos && neg) {
@@ -438,8 +466,12 @@ struct QuantileBindData : public FunctionData {
438
466
  }
439
467
  desc = (neg > 0);
440
468
 
441
- IndirectLess<Value> lt(quantiles.data());
469
+ IndirectLess<Value> lt(normalised.data());
442
470
  std::sort(order.begin(), order.end(), lt);
471
+
472
+ for (const auto &q : normalised) {
473
+ quantiles.emplace_back(QuantileValue(q));
474
+ }
443
475
  }
444
476
 
445
477
  QuantileBindData(const QuantileBindData &other) : order(other.order), desc(other.desc) {
@@ -460,16 +492,24 @@ struct QuantileBindData : public FunctionData {
460
492
  static void Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
461
493
  const AggregateFunction &function) {
462
494
  auto &bind_data = bind_data_p->Cast<QuantileBindData>();
463
- serializer.WriteProperty(100, "quantiles", bind_data.quantiles);
495
+ vector<Value> raw;
496
+ for (const auto &q : bind_data.quantiles) {
497
+ raw.emplace_back(q.val);
498
+ }
499
+ serializer.WriteProperty(100, "quantiles", raw);
464
500
  serializer.WriteProperty(101, "order", bind_data.order);
465
501
  serializer.WriteProperty(102, "desc", bind_data.desc);
466
502
  }
467
503
 
468
504
  static unique_ptr<FunctionData> Deserialize(Deserializer &deserializer, AggregateFunction &function) {
469
505
  auto result = make_uniq<QuantileBindData>();
470
- deserializer.ReadProperty(100, "quantiles", result->quantiles);
506
+ vector<Value> raw;
507
+ deserializer.ReadProperty(100, "quantiles", raw);
471
508
  deserializer.ReadProperty(101, "order", result->order);
472
509
  deserializer.ReadProperty(102, "desc", result->desc);
510
+ for (const auto &r : raw) {
511
+ result->quantiles.emplace_back(QuantileValue(r));
512
+ }
473
513
  return std::move(result);
474
514
  }
475
515
 
@@ -478,7 +518,7 @@ struct QuantileBindData : public FunctionData {
478
518
  throw NotImplementedException("FIXME: serializing quantiles with decimals is not supported right now");
479
519
  }
480
520
 
481
- vector<Value> quantiles;
521
+ vector<QuantileValue> quantiles;
482
522
  vector<idx_t> order;
483
523
  bool desc;
484
524
  };
@@ -566,7 +606,7 @@ struct QuantileScalarOperation : public QuantileOperation {
566
606
  auto &bind_data = aggr_input_data.bind_data->Cast<QuantileBindData>();
567
607
 
568
608
  // Find the two positions needed
569
- const auto q = bind_data.quantiles[0];
609
+ const auto &q = bind_data.quantiles[0];
570
610
 
571
611
  bool replace = false;
572
612
  if (frame.start == prev.start + 1 && frame.end == prev.end + 1) {
@@ -1041,7 +1081,11 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
1041
1081
  return;
1042
1082
  }
1043
1083
  using SAVE_TYPE = typename STATE::SaveType;
1044
- Interpolator<false> interp(0.5, state.v.size(), false);
1084
+ D_ASSERT(finalize_data.input.bind_data);
1085
+ auto &bind_data = finalize_data.input.bind_data->Cast<QuantileBindData>();
1086
+ D_ASSERT(bind_data.quantiles.size() == 1);
1087
+ const auto &q = bind_data.quantiles[0];
1088
+ Interpolator<false> interp(q, state.v.size(), false);
1045
1089
  const auto med = interp.template Operation<SAVE_TYPE, MEDIAN_TYPE>(state.v.data(), finalize_data.result);
1046
1090
 
1047
1091
  MadAccessor<SAVE_TYPE, T, MEDIAN_TYPE> accessor(med);
@@ -1050,8 +1094,8 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
1050
1094
 
1051
1095
  template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
1052
1096
  static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
1053
- AggregateInputData &, STATE &state, const FrameBounds &frame, const FrameBounds &prev,
1054
- Vector &result, idx_t ridx, idx_t bias) {
1097
+ AggregateInputData &aggr_input_data, STATE &state, const FrameBounds &frame,
1098
+ const FrameBounds &prev, Vector &result, idx_t ridx, idx_t bias) {
1055
1099
  auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
1056
1100
  auto &rmask = FlatVector::Validity(result);
1057
1101
 
@@ -1079,7 +1123,10 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
1079
1123
  std::partition(index2, index2 + state.pos, included);
1080
1124
 
1081
1125
  // Find the two positions needed for the median
1082
- const float q = 0.5;
1126
+ D_ASSERT(aggr_input_data.bind_data);
1127
+ auto &bind_data = aggr_input_data.bind_data->Cast<QuantileBindData>();
1128
+ D_ASSERT(bind_data.quantiles.size() == 1);
1129
+ const auto &q = bind_data.quantiles[0];
1083
1130
 
1084
1131
  bool replace = false;
1085
1132
  if (frame.start == prev.start + 1 && frame.end == prev.end + 1) {
@@ -1124,12 +1171,18 @@ struct MedianAbsoluteDeviationOperation : public QuantileOperation {
1124
1171
  }
1125
1172
  };
1126
1173
 
1174
+ unique_ptr<FunctionData> BindMedian(ClientContext &context, AggregateFunction &function,
1175
+ vector<unique_ptr<Expression>> &arguments) {
1176
+ return make_uniq<QuantileBindData>(Value::DECIMAL(int16_t(5), 2, 1));
1177
+ }
1178
+
1127
1179
  template <typename INPUT_TYPE, typename MEDIAN_TYPE, typename TARGET_TYPE>
1128
1180
  AggregateFunction GetTypedMedianAbsoluteDeviationAggregateFunction(const LogicalType &input_type,
1129
1181
  const LogicalType &target_type) {
1130
1182
  using STATE = QuantileState<INPUT_TYPE>;
1131
1183
  using OP = MedianAbsoluteDeviationOperation<MEDIAN_TYPE>;
1132
1184
  auto fun = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, TARGET_TYPE, OP>(input_type, target_type);
1185
+ fun.bind = BindMedian;
1133
1186
  fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1134
1187
  fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, TARGET_TYPE, OP>;
1135
1188
  return fun;
@@ -1173,11 +1226,6 @@ AggregateFunction GetMedianAbsoluteDeviationAggregateFunction(const LogicalType
1173
1226
  }
1174
1227
  }
1175
1228
 
1176
- unique_ptr<FunctionData> BindMedian(ClientContext &context, AggregateFunction &function,
1177
- vector<unique_ptr<Expression>> &arguments) {
1178
- return make_uniq<QuantileBindData>(Value::DECIMAL(int16_t(5), 2, 1));
1179
- }
1180
-
1181
1229
  unique_ptr<FunctionData> BindMedianDecimal(ClientContext &context, AggregateFunction &function,
1182
1230
  vector<unique_ptr<Expression>> &arguments) {
1183
1231
  auto bind_data = BindMedian(context, function, arguments);
@@ -1195,7 +1243,7 @@ unique_ptr<FunctionData> BindMedianAbsoluteDeviationDecimal(ClientContext &conte
1195
1243
  function = GetMedianAbsoluteDeviationAggregateFunction(arguments[0]->return_type);
1196
1244
  function.name = "mad";
1197
1245
  function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1198
- return nullptr;
1246
+ return BindMedian(context, function, arguments);
1199
1247
  }
1200
1248
 
1201
1249
  static const Value &CheckQuantile(const Value &quantile_val) {
@@ -281,7 +281,7 @@ static StaticFunctionDefinition internal_functions[] = {
281
281
  DUCKDB_SCALAR_FUNCTION(RightFun),
282
282
  DUCKDB_SCALAR_FUNCTION(RightGraphemeFun),
283
283
  DUCKDB_SCALAR_FUNCTION_SET(RoundFun),
284
- DUCKDB_SCALAR_FUNCTION_ALIAS(RowFun),
284
+ DUCKDB_SCALAR_FUNCTION(RowFun),
285
285
  DUCKDB_SCALAR_FUNCTION(RpadFun),
286
286
  DUCKDB_SCALAR_FUNCTION_SET(RtrimFun),
287
287
  DUCKDB_SCALAR_FUNCTION_SET(SecondsFun),
@@ -183,7 +183,7 @@ struct StrpTimeFunction {
183
183
  auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
184
184
  auto &info = func_expr.bind_info->Cast<StrpTimeBindData>();
185
185
 
186
- if (ConstantVector::IsNull(args.data[1])) {
186
+ if (args.data[1].GetVectorType() == VectorType::CONSTANT_VECTOR && ConstantVector::IsNull(args.data[1])) {
187
187
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
188
188
  ConstantVector::SetNull(result, true);
189
189
  return;
@@ -203,7 +203,7 @@ struct StrpTimeFunction {
203
203
  auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
204
204
  auto &info = func_expr.bind_info->Cast<StrpTimeBindData>();
205
205
 
206
- if (ConstantVector::IsNull(args.data[1])) {
206
+ if (args.data[1].GetVectorType() == VectorType::CONSTANT_VECTOR && ConstantVector::IsNull(args.data[1])) {
207
207
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
208
208
  ConstantVector::SetNull(result, true);
209
209
  return;
@@ -237,6 +237,16 @@ static void ExecuteFlatSlice(Vector &result, Vector &list_vector, Vector &begin_
237
237
  auto end_idx = end_data.sel->get_index(i);
238
238
  auto step_idx = step_vector ? step_data.sel->get_index(i) : 0;
239
239
 
240
+ auto list_valid = list_data.validity.RowIsValid(list_idx);
241
+ auto begin_valid = begin_data.validity.RowIsValid(begin_idx);
242
+ auto end_valid = end_data.validity.RowIsValid(end_idx);
243
+ auto step_valid = step_vector && step_data.validity.RowIsValid(step_idx);
244
+
245
+ if (!list_valid || !begin_valid || !end_valid || (step_vector && !step_valid)) {
246
+ result_mask.SetInvalid(i);
247
+ continue;
248
+ }
249
+
240
250
  auto sliced = reinterpret_cast<INPUT_TYPE *>(list_data.data)[list_idx];
241
251
  auto begin = begin_is_empty ? 0 : reinterpret_cast<INDEX_TYPE *>(begin_data.data)[begin_idx];
242
252
  auto end = end_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(sliced)
@@ -248,23 +258,19 @@ static void ExecuteFlatSlice(Vector &result, Vector &list_vector, Vector &begin_
248
258
  begin = end_is_empty ? 0 : begin;
249
259
  end = begin_is_empty ? ValueLength<INPUT_TYPE, INDEX_TYPE>(sliced) : end;
250
260
  }
251
- auto list_valid = list_data.validity.RowIsValid(list_idx);
252
- auto begin_valid = begin_data.validity.RowIsValid(begin_idx);
253
- auto end_valid = end_data.validity.RowIsValid(end_idx);
254
- auto step_valid = step_vector && step_data.validity.RowIsValid(step_idx);
255
261
 
256
262
  bool clamp_result = false;
257
- if (list_valid && begin_valid && end_valid && (step_valid || step == 1)) {
263
+ if (step_valid || step == 1) {
258
264
  clamp_result = ClampSlice(sliced, begin, end);
259
265
  }
260
266
 
261
267
  auto length = 0;
262
- if (step_vector && step_valid && list_valid && begin_valid && end_valid && end - begin > 0) {
268
+ if (end - begin > 0) {
263
269
  length = CalculateSliceLength(begin, end, step, step_valid);
264
270
  }
265
271
  sel_length += length;
266
272
 
267
- if (!list_valid || !begin_valid || !end_valid || (step_vector && !step_valid) || !clamp_result) {
273
+ if (!clamp_result) {
268
274
  result_mask.SetInvalid(i);
269
275
  } else if (!step_vector) {
270
276
  result_data[i] = SliceValue<INPUT_TYPE, INDEX_TYPE>(result, sliced, begin, end);
@@ -30,6 +30,7 @@ static void StructPackFunction(DataChunk &args, ExpressionState &state, Vector &
30
30
  result.Verify(args.size());
31
31
  }
32
32
 
33
+ template <bool IS_STRUCT_PACK>
33
34
  static unique_ptr<FunctionData> StructPackBind(ClientContext &context, ScalarFunction &bound_function,
34
35
  vector<unique_ptr<Expression>> &arguments) {
35
36
  case_insensitive_set_t name_collision_set;
@@ -41,17 +42,18 @@ static unique_ptr<FunctionData> StructPackBind(ClientContext &context, ScalarFun
41
42
  child_list_t<LogicalType> struct_children;
42
43
  for (idx_t i = 0; i < arguments.size(); i++) {
43
44
  auto &child = arguments[i];
44
- if (child->alias.empty() && bound_function.name == "struct_pack") {
45
- throw BinderException("Need named argument for struct pack, e.g. STRUCT_PACK(a := b)");
45
+ string alias;
46
+ if (IS_STRUCT_PACK) {
47
+ if (child->alias.empty()) {
48
+ throw BinderException("Need named argument for struct pack, e.g. STRUCT_PACK(a := b)");
49
+ }
50
+ alias = child->alias;
51
+ if (name_collision_set.find(alias) != name_collision_set.end()) {
52
+ throw BinderException("Duplicate struct entry name \"%s\"", alias);
53
+ }
54
+ name_collision_set.insert(alias);
46
55
  }
47
- if (child->alias.empty() && bound_function.name == "row") {
48
- child->alias = "v" + std::to_string(i + 1);
49
- }
50
- if (name_collision_set.find(child->alias) != name_collision_set.end()) {
51
- throw BinderException("Duplicate struct entry name \"%s\"", child->alias);
52
- }
53
- name_collision_set.insert(child->alias);
54
- struct_children.push_back(make_pair(child->alias, arguments[i]->return_type));
56
+ struct_children.push_back(make_pair(alias, arguments[i]->return_type));
55
57
  }
56
58
 
57
59
  // this is more for completeness reasons
@@ -69,10 +71,10 @@ unique_ptr<BaseStatistics> StructPackStats(ClientContext &context, FunctionStati
69
71
  return struct_stats.ToUnique();
70
72
  }
71
73
 
72
- ScalarFunction StructPackFun::GetFunction() {
73
- // the arguments and return types are actually set in the binder function
74
- ScalarFunction fun("struct_pack", {}, LogicalTypeId::STRUCT, StructPackFunction, StructPackBind, nullptr,
75
- StructPackStats);
74
+ template <bool IS_STRUCT_PACK>
75
+ ScalarFunction GetStructPackFunction() {
76
+ ScalarFunction fun(IS_STRUCT_PACK ? "struct_pack" : "row", {}, LogicalTypeId::STRUCT, StructPackFunction,
77
+ StructPackBind<IS_STRUCT_PACK>, nullptr, StructPackStats);
76
78
  fun.varargs = LogicalType::ANY;
77
79
  fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
78
80
  fun.serialize = VariableReturnBindData::Serialize;
@@ -80,4 +82,12 @@ ScalarFunction StructPackFun::GetFunction() {
80
82
  return fun;
81
83
  }
82
84
 
85
+ ScalarFunction StructPackFun::GetFunction() {
86
+ return GetStructPackFunction<true>();
87
+ }
88
+
89
+ ScalarFunction RowFun::GetFunction() {
90
+ return GetStructPackFunction<false>();
91
+ }
92
+
83
93
  } // namespace duckdb
@@ -335,8 +335,6 @@ normal : {
335
335
  if (c == options.dialect_options.state_machine_options.delimiter) {
336
336
  // delimiter: end the value and add it to the chunk
337
337
  goto add_value;
338
- } else if (c == options.dialect_options.state_machine_options.quote && try_add_line) {
339
- return false;
340
338
  } else if (StringUtil::CharacterIsNewline(c)) {
341
339
  // newline: add row
342
340
  if (column > 0 || try_add_line || parse_chunk.data.size() == 1) {
@@ -141,6 +141,10 @@ struct SniffValue {
141
141
  (current_char == '\r' || current_char == '\n')) ||
142
142
  (machine.dialect_options.new_line == NewLineIdentifier::CARRY_ON && current_char == '\n')) {
143
143
  machine.rows_read++;
144
+ }
145
+
146
+ if ((machine.previous_state == CSVState::RECORD_SEPARATOR && machine.state != CSVState::EMPTY_LINE) ||
147
+ (machine.state != CSVState::RECORD_SEPARATOR && machine.previous_state == CSVState::CARRIAGE_RETURN)) {
144
148
  sniffed_values[machine.cur_rows].position = machine.line_start_pos;
145
149
  sniffed_values[machine.cur_rows].set = true;
146
150
  machine.line_start_pos = current_pos;
@@ -287,11 +291,15 @@ void CSVSniffer::DetectTypes() {
287
291
  candidate->csv_buffer_iterator.Process<SniffValue>(*candidate, tuples);
288
292
  // Potentially Skip empty rows (I find this dirty, but it is what the original code does)
289
293
  idx_t true_start = 0;
294
+ idx_t true_pos = 0;
290
295
  idx_t values_start = 0;
291
296
  while (true_start < tuples.size()) {
292
297
  if (tuples[true_start].values.empty() ||
293
298
  (tuples[true_start].values.size() == 1 && tuples[true_start].values[0].IsNull())) {
294
299
  true_start = tuples[true_start].line_number;
300
+ if (true_start < tuples.size()) {
301
+ true_pos = tuples[true_start].position;
302
+ }
295
303
  values_start++;
296
304
  } else {
297
305
  break;
@@ -301,7 +309,11 @@ void CSVSniffer::DetectTypes() {
301
309
  // Potentially Skip Notes (I also find this dirty, but it is what the original code does)
302
310
  while (true_start < tuples.size()) {
303
311
  if (tuples[true_start].values.size() < max_columns_found && !options.null_padding) {
312
+
304
313
  true_start = tuples[true_start].line_number;
314
+ if (true_start < tuples.size()) {
315
+ true_pos = tuples[true_start].position;
316
+ }
305
317
  values_start++;
306
318
  } else {
307
319
  break;
@@ -317,7 +329,7 @@ void CSVSniffer::DetectTypes() {
317
329
  row_idx = 1;
318
330
  }
319
331
  if (!tuples.empty()) {
320
- best_start_without_header = tuples[0].position;
332
+ best_start_without_header = tuples[0].position - true_pos;
321
333
  }
322
334
 
323
335
  // First line where we start our type detection
@@ -387,7 +399,7 @@ void CSVSniffer::DetectTypes() {
387
399
  best_sql_types_candidates_per_column_idx = info_sql_types_candidates;
388
400
  best_format_candidates = format_candidates;
389
401
  best_header_row = tuples[0].values;
390
- best_start_with_header = tuples[0].position;
402
+ best_start_with_header = tuples[0].position - true_pos;
391
403
  }
392
404
  }
393
405
  // Assert that it's all good at this point.
@@ -59,7 +59,7 @@ static void WriteCopyStatement(FileSystem &fs, stringstream &ss, CopyInfo &info,
59
59
  if (info.format == "csv") {
60
60
  // insert default csv options, if not specified
61
61
  if (info.options.find("header") == info.options.end()) {
62
- info.options["header"].push_back(Value::INTEGER(0));
62
+ info.options["header"].push_back(Value::INTEGER(1));
63
63
  }
64
64
  if (info.options.find("delimiter") == info.options.end() && info.options.find("sep") == info.options.end() &&
65
65
  info.options.find("delim") == info.options.end()) {
@@ -184,19 +184,21 @@ static bool PlanIndexJoin(ClientContext &context, LogicalComparisonJoin &op, uni
184
184
  if (!index) {
185
185
  return false;
186
186
  }
187
- // index joins are not supported if there are pushed down table filters
188
- D_ASSERT(right->type == PhysicalOperatorType::TABLE_SCAN);
189
- auto &tbl_scan = right->Cast<PhysicalTableScan>();
190
- // if (tbl_scan.table_filters && !tbl_scan.table_filters->filters.empty()) {
191
- // return false;
192
- // }
187
+
193
188
  // index joins are disabled if enable_optimizer is false
194
189
  if (!ClientConfig::GetConfig(context).enable_optimizer) {
195
190
  return false;
196
191
  }
192
+
193
+ // index joins are disabled on default
194
+ auto force_index_join = ClientConfig::GetConfig(context).force_index_join;
195
+ if (!ClientConfig::GetConfig(context).enable_index_join && !force_index_join) {
196
+ return false;
197
+ }
198
+
197
199
  // check if the cardinality difference justifies an index join
198
- if (!((ClientConfig::GetConfig(context).force_index_join ||
199
- left->estimated_cardinality < 0.01 * right->estimated_cardinality))) {
200
+ auto index_join_is_applicable = left->estimated_cardinality < 0.01 * right->estimated_cardinality;
201
+ if (!index_join_is_applicable && !force_index_join) {
200
202
  return false;
201
203
  }
202
204
 
@@ -205,6 +207,9 @@ static bool PlanIndexJoin(ClientContext &context, LogicalComparisonJoin &op, uni
205
207
  swap(op.conditions[0].left, op.conditions[0].right);
206
208
  swap(op.left_projection_map, op.right_projection_map);
207
209
  }
210
+ D_ASSERT(right->type == PhysicalOperatorType::TABLE_SCAN);
211
+ auto &tbl_scan = right->Cast<PhysicalTableScan>();
212
+
208
213
  plan = make_uniq<PhysicalIndexJoin>(op, std::move(left), std::move(right), std::move(op.conditions), op.join_type,
209
214
  op.left_projection_map, op.right_projection_map, tbl_scan.column_ids, *index,
210
215
  !swap_condition, op.estimated_cardinality);
@@ -9,10 +9,18 @@ unique_ptr<BoundCastData> StructBoundCastData::BindStructToStructCast(BindCastIn
9
9
  vector<BoundCastInfo> child_cast_info;
10
10
  auto &source_child_types = StructType::GetChildTypes(source);
11
11
  auto &result_child_types = StructType::GetChildTypes(target);
12
+
13
+ auto target_is_unnamed = StructType::IsUnnamed(target);
14
+ auto source_is_unnamed = StructType::IsUnnamed(source);
15
+
12
16
  if (source_child_types.size() != result_child_types.size()) {
13
17
  throw TypeMismatchException(source, target, "Cannot cast STRUCTs of different size");
14
18
  }
15
19
  for (idx_t i = 0; i < source_child_types.size(); i++) {
20
+ if (!target_is_unnamed && !source_is_unnamed &&
21
+ !StringUtil::CIEquals(source_child_types[i].first, result_child_types[i].first)) {
22
+ throw TypeMismatchException(source, target, "Cannot cast STRUCTs with different names");
23
+ }
16
24
  auto child_cast = input.GetCastFunction(source_child_types[i].second, result_child_types[i].second);
17
25
  child_cast_info.push_back(std::move(child_cast));
18
26
  }
@@ -304,6 +304,11 @@ static bool UnionToVarcharCast(Vector &source, Vector &result, idx_t count, Cast
304
304
  // now construct the actual varchar vector
305
305
  varchar_union.Flatten(count);
306
306
  auto &tag_vector = UnionVector::GetTags(source);
307
+ auto tag_vector_type = tag_vector.GetVectorType();
308
+ if (tag_vector_type != VectorType::CONSTANT_VECTOR && tag_vector_type != VectorType::FLAT_VECTOR) {
309
+ tag_vector.Flatten(count);
310
+ }
311
+
307
312
  auto tags = FlatVector::GetData<union_tag_t>(tag_vector);
308
313
 
309
314
  auto &validity = FlatVector::Validity(varchar_union);
@@ -268,8 +268,17 @@ unique_ptr<Expression> FunctionBinder::BindScalarFunction(ScalarFunctionCatalogE
268
268
 
269
269
  if (bound_function.null_handling == FunctionNullHandling::DEFAULT_NULL_HANDLING) {
270
270
  for (auto &child : children) {
271
- if (child->return_type == LogicalTypeId::SQLNULL ||
272
- (child->IsFoldable() && ExpressionExecutor::EvaluateScalar(context, *child).IsNull())) {
271
+ if (child->return_type == LogicalTypeId::SQLNULL) {
272
+ return make_uniq<BoundConstantExpression>(Value(LogicalType::SQLNULL));
273
+ }
274
+ if (!child->IsFoldable()) {
275
+ continue;
276
+ }
277
+ Value result;
278
+ if (!ExpressionExecutor::TryEvaluateScalar(context, *child, result)) {
279
+ continue;
280
+ }
281
+ if (result.IsNull()) {
273
282
  return make_uniq<BoundConstantExpression>(Value(LogicalType::SQLNULL));
274
283
  }
275
284
  }
@@ -80,6 +80,10 @@ static void PragmaEnableForceParallelism(ClientContext &context, const FunctionP
80
80
  ClientConfig::GetConfig(context).verify_parallelism = true;
81
81
  }
82
82
 
83
+ static void PragmaEnableIndexJoin(ClientContext &context, const FunctionParameters &parameters) {
84
+ ClientConfig::GetConfig(context).enable_index_join = true;
85
+ }
86
+
83
87
  static void PragmaEnableForceIndexJoin(ClientContext &context, const FunctionParameters &parameters) {
84
88
  ClientConfig::GetConfig(context).force_index_join = true;
85
89
  }
@@ -140,6 +144,7 @@ void PragmaFunctions::RegisterFunction(BuiltinFunctions &set) {
140
144
  set.AddFunction(PragmaFunction::PragmaStatement("enable_optimizer", PragmaEnableOptimizer));
141
145
  set.AddFunction(PragmaFunction::PragmaStatement("disable_optimizer", PragmaDisableOptimizer));
142
146
 
147
+ set.AddFunction(PragmaFunction::PragmaStatement("enable_index_join", PragmaEnableIndexJoin));
143
148
  set.AddFunction(PragmaFunction::PragmaStatement("force_index_join", PragmaEnableForceIndexJoin));
144
149
  set.AddFunction(PragmaFunction::PragmaStatement("force_checkpoint", PragmaForceCheckpoint));
145
150
 
@@ -26,6 +26,8 @@ idx_t StrfTimepecifierSize(StrTimeSpecifier specifier) {
26
26
  case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
27
27
  case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
28
28
  return 2;
29
+ case StrTimeSpecifier::NANOSECOND_PADDED:
30
+ return 9;
29
31
  case StrTimeSpecifier::MICROSECOND_PADDED:
30
32
  return 6;
31
33
  case StrTimeSpecifier::MILLISECOND_PADDED:
@@ -183,9 +185,15 @@ char *StrfTimeFormat::WritePadded3(char *target, uint32_t value) {
183
185
  }
184
186
  }
185
187
 
186
- // write a value in the range of 0..999999 padded to 6 digits
188
+ // write a value in the range of 0..999999... padded to the given number of digits
187
189
  char *StrfTimeFormat::WritePadded(char *target, uint32_t value, size_t padding) {
188
- D_ASSERT(padding % 2 == 0);
190
+ D_ASSERT(padding > 1);
191
+ if (padding % 2) {
192
+ int decimals = value % 1000;
193
+ WritePadded3(target + padding - 3, decimals);
194
+ value /= 1000;
195
+ padding -= 3;
196
+ }
189
197
  for (size_t i = 0; i < padding / 2; i++) {
190
198
  int decimals = value % 100;
191
199
  WritePadded2(target + padding - 2 * (i + 1), decimals);
@@ -309,11 +317,14 @@ char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t
309
317
  case StrTimeSpecifier::SECOND_PADDED:
310
318
  target = WritePadded2(target, data[5]);
311
319
  break;
320
+ case StrTimeSpecifier::NANOSECOND_PADDED:
321
+ target = WritePadded(target, data[6] * Interval::NANOS_PER_MICRO, 9);
322
+ break;
312
323
  case StrTimeSpecifier::MICROSECOND_PADDED:
313
324
  target = WritePadded(target, data[6], 6);
314
325
  break;
315
326
  case StrTimeSpecifier::MILLISECOND_PADDED:
316
- target = WritePadded3(target, data[6] / 1000);
327
+ target = WritePadded3(target, data[6] / Interval::MICROS_PER_MSEC);
317
328
  break;
318
329
  case StrTimeSpecifier::UTC_OFFSET: {
319
330
  *target++ = (data[7] < 0) ? '-' : '+';
@@ -516,6 +527,9 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
516
527
  case 'S':
517
528
  specifier = StrTimeSpecifier::SECOND_PADDED;
518
529
  break;
530
+ case 'n':
531
+ specifier = StrTimeSpecifier::NANOSECOND_PADDED;
532
+ break;
519
533
  case 'f':
520
534
  specifier = StrTimeSpecifier::MICROSECOND_PADDED;
521
535
  break;
@@ -660,6 +674,8 @@ int StrpTimeFormat::NumericSpecifierWidth(StrTimeSpecifier specifier) {
660
674
  return 4;
661
675
  case StrTimeSpecifier::MICROSECOND_PADDED:
662
676
  return 6;
677
+ case StrTimeSpecifier::NANOSECOND_PADDED:
678
+ return 9;
663
679
  default:
664
680
  return -1;
665
681
  }
@@ -855,15 +871,20 @@ bool StrpTimeFormat::Parse(string_t str, ParseResult &result) const {
855
871
  // seconds
856
872
  result_data[5] = number;
857
873
  break;
874
+ case StrTimeSpecifier::NANOSECOND_PADDED:
875
+ D_ASSERT(number < Interval::NANOS_PER_SEC); // enforced by the length of the number
876
+ // microseconds (rounded)
877
+ result_data[6] = (number + Interval::NANOS_PER_MICRO / 2) / Interval::NANOS_PER_MICRO;
878
+ break;
858
879
  case StrTimeSpecifier::MICROSECOND_PADDED:
859
- D_ASSERT(number < 1000000ULL); // enforced by the length of the number
860
- // milliseconds
880
+ D_ASSERT(number < Interval::MICROS_PER_SEC); // enforced by the length of the number
881
+ // microseconds
861
882
  result_data[6] = number;
862
883
  break;
863
884
  case StrTimeSpecifier::MILLISECOND_PADDED:
864
- D_ASSERT(number < 1000ULL); // enforced by the length of the number
865
- // milliseconds
866
- result_data[6] = number * 1000;
885
+ D_ASSERT(number < Interval::MSECS_PER_SEC); // enforced by the length of the number
886
+ // microseconds
887
+ result_data[6] = number * Interval::MICROS_PER_MSEC;
867
888
  break;
868
889
  case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
869
890
  case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
@@ -237,6 +237,10 @@ void ArrowTableFunction::PopulateArrowTableType(ArrowTableType &arrow_table, Arr
237
237
 
238
238
  unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
239
239
  vector<LogicalType> &return_types, vector<string> &names) {
240
+ if (input.inputs[0].IsNull() || input.inputs[1].IsNull() || input.inputs[2].IsNull()) {
241
+ throw BinderException("arrow_scan: pointers cannot be null");
242
+ }
243
+
240
244
  auto stream_factory_ptr = input.inputs[0].GetPointer();
241
245
  auto stream_factory_produce = (stream_factory_produce_t)input.inputs[1].GetPointer(); // NOLINT
242
246
  auto stream_factory_get_schema = (stream_factory_get_schema_t)input.inputs[2].GetPointer(); // NOLINT
@@ -157,6 +157,7 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, CopyInfo &in
157
157
  }
158
158
 
159
159
  bind_data->FinalizeRead(context);
160
+
160
161
  if (options.auto_detect) {
161
162
  // We must run the sniffer.
162
163
  auto file_handle = BaseCSVReader::OpenCSV(context, options);
@@ -332,7 +333,7 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
332
333
  global_data->WriteData(options.prefix.c_str(), options.prefix.size());
333
334
  }
334
335
 
335
- if (options.dialect_options.header) {
336
+ if (!(options.has_header && !options.dialect_options.header)) {
336
337
  MemoryStream stream;
337
338
  // write the header line to the file
338
339
  for (idx_t i = 0; i < csv_data.options.name_list.size(); i++) {
@@ -178,7 +178,10 @@ public:
178
178
  current_file_path = files_path_p[0];
179
179
  CSVFileHandle *file_handle_ptr;
180
180
 
181
- if (!buffer_manager) {
181
+ if (!buffer_manager || (options.skip_rows_set && options.dialect_options.skip_rows > 0)) {
182
+ // If our buffers are too small, and we skip too many rows there is a chance things will go over-buffer
183
+ // for now don't reuse the buffer manager
184
+ buffer_manager.reset();
182
185
  file_handle = ReadCSV::OpenCSV(current_file_path, options.compression, context);
183
186
  file_handle_ptr = file_handle.get();
184
187
  } else {
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev5002"
2
+ #define DUCKDB_VERSION "0.8.2-dev5120"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "239f51293c"
5
+ #define DUCKDB_SOURCE_ID "fc2e4b26a6"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -408,6 +408,7 @@ struct StructType {
408
408
  DUCKDB_API static const LogicalType &GetChildType(const LogicalType &type, idx_t index);
409
409
  DUCKDB_API static const string &GetChildName(const LogicalType &type, idx_t index);
410
410
  DUCKDB_API static idx_t GetChildCount(const LogicalType &type);
411
+ DUCKDB_API static bool IsUnnamed(const LogicalType &type);
411
412
  };
412
413
 
413
414
  struct MapType {
@@ -34,9 +34,12 @@ struct StructPackFun {
34
34
  };
35
35
 
36
36
  struct RowFun {
37
- using ALIAS = StructPackFun;
38
-
39
37
  static constexpr const char *Name = "row";
38
+ static constexpr const char *Parameters = "any";
39
+ static constexpr const char *Description = "Creates an unnamed STRUCT containing the argument values.";
40
+ static constexpr const char *Example = "row(4, 'hello')";
41
+
42
+ static ScalarFunction GetFunction();
40
43
  };
41
44
 
42
45
  } // namespace duckdb
@@ -52,7 +52,8 @@ enum class StrTimeSpecifier : uint8_t {
52
52
  LOCALE_APPROPRIATE_DATE_AND_TIME =
53
53
  29, // %c - Locale’s appropriate date and time representation. (Mon Sep 30 07:06:05 2013)
54
54
  LOCALE_APPROPRIATE_DATE = 30, // %x - Locale’s appropriate date representation. (09/30/13)
55
- LOCALE_APPROPRIATE_TIME = 31 // %X - Locale’s appropriate time representation. (07:06:05)
55
+ LOCALE_APPROPRIATE_TIME = 31, // %X - Locale’s appropriate time representation. (07:06:05)
56
+ NANOSECOND_PADDED = 32 // %n - Nanosecond as a decimal number, zero-padded on the left. (000000000 - 999999999)
56
57
  };
57
58
 
58
59
  struct StrTimeFormat {
@@ -67,6 +67,8 @@ struct ClientConfig {
67
67
  bool enable_caching_operators = true;
68
68
  //! Force parallelism of small tables, used for testing
69
69
  bool verify_parallelism = false;
70
+ //! Enable the optimizer to consider index joins, which are disabled on default
71
+ bool enable_index_join = false;
70
72
  //! Force index join independent of table cardinality, used for testing
71
73
  bool force_index_join = false;
72
74
  //! Force out-of-core computation for operators that support it, used for testing
@@ -39,6 +39,7 @@ class CompressionFunction;
39
39
  class TableFunctionRef;
40
40
  class OperatorExtension;
41
41
  class StorageExtension;
42
+ class ExtensionCallback;
42
43
 
43
44
  struct CompressionFunctionSet;
44
45
  struct DBConfig;
@@ -212,6 +213,8 @@ public:
212
213
  case_insensitive_map_t<duckdb::unique_ptr<StorageExtension>> storage_extensions;
213
214
  //! A buffer pool can be shared across multiple databases (if desired).
214
215
  shared_ptr<BufferPool> buffer_pool;
216
+ //! Set of callbacks that can be installed by extensions
217
+ vector<unique_ptr<ExtensionCallback>> extension_callbacks;
215
218
 
216
219
  public:
217
220
  DUCKDB_API static DBConfig &GetConfig(ClientContext &context);
@@ -14,6 +14,7 @@
14
14
  #include "duckdb/common/unordered_set.hpp"
15
15
  #include "duckdb/common/winapi.hpp"
16
16
  #include "duckdb/planner/expression/bound_parameter_data.hpp"
17
+ #include "duckdb/planner/bound_parameter_map.hpp"
17
18
 
18
19
  namespace duckdb {
19
20
  class CatalogEntry;
@@ -10,12 +10,39 @@
10
10
 
11
11
  #include "duckdb/common/types.hpp"
12
12
  #include "duckdb/common/unordered_map.hpp"
13
+ #include "duckdb/planner/expression/bound_parameter_data.hpp"
13
14
  #include "duckdb/common/case_insensitive_map.hpp"
14
15
 
15
16
  namespace duckdb {
16
17
 
17
- struct BoundParameterData;
18
+ class ParameterExpression;
19
+ class BoundParameterExpression;
18
20
 
19
21
  using bound_parameter_map_t = case_insensitive_map_t<shared_ptr<BoundParameterData>>;
20
22
 
23
+ struct BoundParameterMap {
24
+ public:
25
+ explicit BoundParameterMap(case_insensitive_map_t<BoundParameterData> &parameter_data);
26
+
27
+ public:
28
+ LogicalType GetReturnType(const string &identifier);
29
+
30
+ bound_parameter_map_t *GetParametersPtr();
31
+
32
+ const bound_parameter_map_t &GetParameters();
33
+
34
+ const case_insensitive_map_t<BoundParameterData> &GetParameterData();
35
+
36
+ unique_ptr<BoundParameterExpression> BindParameterExpression(ParameterExpression &expr);
37
+
38
+ private:
39
+ shared_ptr<BoundParameterData> CreateOrGetData(const string &identifier);
40
+ void CreateNewParameter(const string &id, const shared_ptr<BoundParameterData> &param_data);
41
+
42
+ private:
43
+ bound_parameter_map_t parameters;
44
+ // Pre-provided parameter data if populated
45
+ case_insensitive_map_t<BoundParameterData> &parameter_data;
46
+ };
47
+
21
48
  } // namespace duckdb
@@ -9,7 +9,6 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/types/value.hpp"
12
- #include "duckdb/planner/bound_parameter_map.hpp"
13
12
  #include "duckdb/common/case_insensitive_map.hpp"
14
13
 
15
14
  namespace duckdb {
@@ -40,21 +39,4 @@ public:
40
39
  static shared_ptr<BoundParameterData> Deserialize(Deserializer &deserializer);
41
40
  };
42
41
 
43
- struct BoundParameterMap {
44
- explicit BoundParameterMap(case_insensitive_map_t<BoundParameterData> &parameter_data)
45
- : parameter_data(parameter_data) {
46
- }
47
-
48
- bound_parameter_map_t parameters;
49
- case_insensitive_map_t<BoundParameterData> &parameter_data;
50
-
51
- LogicalType GetReturnType(const string &identifier) {
52
- auto it = parameter_data.find(identifier);
53
- if (it == parameter_data.end()) {
54
- return LogicalTypeId::UNKNOWN;
55
- }
56
- return it->second.return_type;
57
- }
58
- };
59
-
60
42
  } // namespace duckdb
@@ -9,7 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/planner/expression.hpp"
12
- #include "duckdb/planner/expression/bound_parameter_data.hpp"
12
+ #include "duckdb/planner/bound_parameter_map.hpp"
13
13
 
14
14
  namespace duckdb {
15
15
 
@@ -0,0 +1,26 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/planner/extension_callback.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/common.hpp"
12
+
13
+ namespace duckdb {
14
+ class DatabaseInstance;
15
+
16
+ class ExtensionCallback {
17
+ public:
18
+ virtual ~ExtensionCallback() {
19
+ }
20
+
21
+ //! Called after an extension is finished loading
22
+ virtual void OnExtensionLoaded(DatabaseInstance &db, const string &name) {
23
+ }
24
+ };
25
+
26
+ } // namespace duckdb
@@ -11,7 +11,7 @@
11
11
  #include "duckdb/parser/sql_statement.hpp"
12
12
  #include "duckdb/planner/binder.hpp"
13
13
  #include "duckdb/planner/logical_operator.hpp"
14
- #include "duckdb/planner/expression/bound_parameter_data.hpp"
14
+ #include "duckdb/planner/bound_parameter_map.hpp"
15
15
 
16
16
  namespace duckdb {
17
17
  class ClientContext;
@@ -124,10 +124,16 @@ duckdb_type duckdb_param_type(duckdb_prepared_statement prepared_statement, idx_
124
124
  }
125
125
  LogicalType param_type;
126
126
  auto identifier = std::to_string(param_idx);
127
- if (!wrapper->statement->data->TryGetType(identifier, param_type)) {
128
- return DUCKDB_TYPE_INVALID;
127
+ if (wrapper->statement->data->TryGetType(identifier, param_type)) {
128
+ return ConvertCPPTypeToC(param_type);
129
+ }
130
+ // The value_map is gone after executing the prepared statement
131
+ // See if this is the case and we still have a value registered for it
132
+ auto it = wrapper->values.find(identifier);
133
+ if (it != wrapper->values.end()) {
134
+ return ConvertCPPTypeToC(it->second.type());
129
135
  }
130
- return ConvertCPPTypeToC(param_type);
136
+ return DUCKDB_TYPE_INVALID;
131
137
  }
132
138
 
133
139
  duckdb_state duckdb_clear_bindings(duckdb_prepared_statement prepared_statement) {
@@ -19,6 +19,7 @@
19
19
  #include "duckdb/storage/storage_extension.hpp"
20
20
  #include "duckdb/storage/storage_manager.hpp"
21
21
  #include "duckdb/transaction/transaction_manager.hpp"
22
+ #include "duckdb/planner/extension_callback.hpp"
22
23
 
23
24
  #ifndef DUCKDB_NO_THREADS
24
25
  #include "duckdb/common/thread.hpp"
@@ -381,6 +382,11 @@ bool DuckDB::ExtensionIsLoaded(const std::string &name) {
381
382
  void DatabaseInstance::SetExtensionLoaded(const std::string &name) {
382
383
  auto extension_name = ExtensionHelper::GetExtensionName(name);
383
384
  loaded_extensions.insert(extension_name);
385
+
386
+ auto &callbacks = DBConfig::GetConfig(*this).extension_callbacks;
387
+ for (auto &callback : callbacks) {
388
+ callback->OnExtensionLoaded(*this, name);
389
+ }
384
390
  }
385
391
 
386
392
  bool DatabaseInstance::TryGetCurrentSetting(const std::string &key, Value &result) {
@@ -21,6 +21,10 @@ namespace duckdb {
21
21
  // Access Mode
22
22
  //===--------------------------------------------------------------------===//
23
23
  void AccessModeSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
24
+ if (db) {
25
+ throw InvalidInputException("Cannot change access_mode setting while database is running - it must be set when "
26
+ "opening or attaching the database");
27
+ }
24
28
  auto parameter = StringUtil::Lower(input.ToString());
25
29
  if (parameter == "automatic") {
26
30
  config.options.access_mode = AccessMode::AUTOMATIC;
@@ -119,6 +119,7 @@ BindResult ExpressionBinder::BindExpression(ComparisonExpression &expr, idx_t de
119
119
  if (!error.empty()) {
120
120
  return BindResult(error);
121
121
  }
122
+
122
123
  // the children have been successfully resolved
123
124
  auto &left = BoundExpression::GetExpression(*expr.left);
124
125
  auto &right = BoundExpression::GetExpression(*expr.right);
@@ -7,35 +7,25 @@
7
7
  namespace duckdb {
8
8
 
9
9
  BindResult ExpressionBinder::BindExpression(ParameterExpression &expr, idx_t depth) {
10
- auto bound_parameter = make_uniq<BoundParameterExpression>(expr.identifier);
11
- bound_parameter->alias = expr.alias;
12
10
  if (!binder.parameters) {
13
11
  throw BinderException("Unexpected prepared parameter. This type of statement can't be prepared!");
14
12
  }
15
13
  auto parameter_id = expr.identifier;
16
- // check if a parameter value has already been supplied
17
- if (binder.parameters->parameter_data.count(parameter_id)) {
14
+
15
+ D_ASSERT(binder.parameters);
16
+ // Check if a parameter value has already been supplied
17
+ auto &parameter_data = binder.parameters->GetParameterData();
18
+ auto param_data_it = parameter_data.find(parameter_id);
19
+ if (param_data_it != parameter_data.end()) {
18
20
  // it has! emit a constant directly
19
- auto &data = binder.parameters->parameter_data[parameter_id];
21
+ auto &data = param_data_it->second;
20
22
  auto constant = make_uniq<BoundConstantExpression>(data.GetValue());
21
23
  constant->alias = expr.alias;
24
+ constant->return_type = binder.parameters->GetReturnType(parameter_id);
22
25
  return BindResult(std::move(constant));
23
26
  }
24
27
 
25
- auto entry = binder.parameters->parameters.find(parameter_id);
26
- if (entry == binder.parameters->parameters.end()) {
27
- // no entry yet: create a new one
28
- auto data = make_shared<BoundParameterData>();
29
- data->return_type = binder.parameters->GetReturnType(parameter_id);
30
- bound_parameter->return_type = data->return_type;
31
- bound_parameter->parameter_data = data;
32
- binder.parameters->parameters[parameter_id] = std::move(data);
33
- } else {
34
- // a prepared statement with this parameter index was already there: use it
35
- auto &data = entry->second;
36
- bound_parameter->parameter_data = data;
37
- bound_parameter->return_type = binder.parameters->GetReturnType(parameter_id);
38
- }
28
+ auto bound_parameter = binder.parameters->BindParameterExpression(expr);
39
29
  return BindResult(std::move(bound_parameter));
40
30
  }
41
31
 
@@ -0,0 +1,67 @@
1
+ #include "duckdb/planner/bound_parameter_map.hpp"
2
+ #include "duckdb/parser/expression/parameter_expression.hpp"
3
+ #include "duckdb/planner/expression/bound_parameter_expression.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ BoundParameterMap::BoundParameterMap(case_insensitive_map_t<BoundParameterData> &parameter_data)
8
+ : parameter_data(parameter_data) {
9
+ }
10
+
11
+ LogicalType BoundParameterMap::GetReturnType(const string &identifier) {
12
+ D_ASSERT(!identifier.empty());
13
+ auto it = parameter_data.find(identifier);
14
+ if (it == parameter_data.end()) {
15
+ return LogicalTypeId::UNKNOWN;
16
+ }
17
+ return it->second.return_type;
18
+ }
19
+
20
+ bound_parameter_map_t *BoundParameterMap::GetParametersPtr() {
21
+ return &parameters;
22
+ }
23
+
24
+ const bound_parameter_map_t &BoundParameterMap::GetParameters() {
25
+ return parameters;
26
+ }
27
+
28
+ const case_insensitive_map_t<BoundParameterData> &BoundParameterMap::GetParameterData() {
29
+ return parameter_data;
30
+ }
31
+
32
+ shared_ptr<BoundParameterData> BoundParameterMap::CreateOrGetData(const string &identifier) {
33
+ auto entry = parameters.find(identifier);
34
+ if (entry == parameters.end()) {
35
+ // no entry yet: create a new one
36
+ auto data = make_shared<BoundParameterData>();
37
+ data->return_type = GetReturnType(identifier);
38
+
39
+ CreateNewParameter(identifier, data);
40
+ return data;
41
+ }
42
+ return entry->second;
43
+ }
44
+
45
+ unique_ptr<BoundParameterExpression> BoundParameterMap::BindParameterExpression(ParameterExpression &expr) {
46
+ auto &identifier = expr.identifier;
47
+ auto return_type = GetReturnType(identifier);
48
+
49
+ D_ASSERT(!parameter_data.count(identifier));
50
+
51
+ // No value has been supplied yet,
52
+ // We return a shared pointer to an object that will get populated wtih a Value later
53
+ // When the BoundParameterExpression get executed, this will be used to get the corresponding value
54
+ auto param_data = CreateOrGetData(identifier);
55
+ auto bound_expr = make_uniq<BoundParameterExpression>(identifier);
56
+ bound_expr->parameter_data = param_data;
57
+ bound_expr->return_type = return_type;
58
+ bound_expr->alias = expr.alias;
59
+ return bound_expr;
60
+ }
61
+
62
+ void BoundParameterMap::CreateNewParameter(const string &id, const shared_ptr<BoundParameterData> &param_data) {
63
+ D_ASSERT(!parameters.count(id));
64
+ parameters.emplace(std::make_pair(id, param_data));
65
+ }
66
+
67
+ } // namespace duckdb
@@ -78,10 +78,10 @@ void Planner::CreatePlan(SQLStatement &statement) {
78
78
  this->properties.parameter_count = parameter_count;
79
79
  properties.bound_all_parameters = parameters_resolved;
80
80
 
81
- Planner::VerifyPlan(context, plan, &bound_parameters.parameters);
81
+ Planner::VerifyPlan(context, plan, bound_parameters.GetParametersPtr());
82
82
 
83
83
  // set up a map of parameter number -> value entries
84
- for (auto &kv : bound_parameters.parameters) {
84
+ for (auto &kv : bound_parameters.GetParameters()) {
85
85
  auto &identifier = kv.first;
86
86
  auto &param = kv.second;
87
87
  // check if the type of the parameter could be resolved
@@ -8,6 +8,7 @@
8
8
  #include "duckdb/storage/metadata/metadata_writer.hpp"
9
9
  #include "duckdb/storage/buffer_manager.hpp"
10
10
  #include "duckdb/main/config.hpp"
11
+ #include "duckdb/main/database.hpp"
11
12
 
12
13
  #include <algorithm>
13
14
  #include <cstring>
@@ -16,12 +17,22 @@ namespace duckdb {
16
17
 
17
18
  const char MainHeader::MAGIC_BYTES[] = "DUCK";
18
19
 
20
+ void SerializeVersionNumber(WriteStream &ser, const string &version_str) {
21
+ constexpr const idx_t MAX_VERSION_SIZE = 32;
22
+ data_t version[MAX_VERSION_SIZE];
23
+ memset(version, 0, MAX_VERSION_SIZE);
24
+ memcpy(version, version_str.c_str(), MinValue<idx_t>(version_str.size(), MAX_VERSION_SIZE));
25
+ ser.WriteData(version, MAX_VERSION_SIZE);
26
+ }
27
+
19
28
  void MainHeader::Write(WriteStream &ser) {
20
29
  ser.WriteData(const_data_ptr_cast(MAGIC_BYTES), MAGIC_BYTE_SIZE);
21
30
  ser.Write<uint64_t>(version_number);
22
31
  for (idx_t i = 0; i < FLAG_COUNT; i++) {
23
32
  ser.Write<uint64_t>(flags[i]);
24
33
  }
34
+ SerializeVersionNumber(ser, DuckDB::LibraryVersion());
35
+ SerializeVersionNumber(ser, DuckDB::SourceID());
25
36
  }
26
37
 
27
38
  void MainHeader::CheckMagicBytes(FileHandle &handle) {
@@ -15,6 +15,9 @@ StructColumnData::StructColumnData(BlockManager &block_manager, DataTableInfo &i
15
15
  D_ASSERT(type.InternalType() == PhysicalType::STRUCT);
16
16
  auto &child_types = StructType::GetChildTypes(type);
17
17
  D_ASSERT(child_types.size() > 0);
18
+ if (type.id() != LogicalTypeId::UNION && StructType::IsUnnamed(type)) {
19
+ throw InvalidInputException("A table cannot be created from an unnamed struct");
20
+ }
18
21
  // the sub column index, starting at 1 (0 is the validity mask)
19
22
  idx_t sub_column_index = 1;
20
23
  for (auto &child_type : child_types) {
@@ -1,5 +1,7 @@
1
1
  #include "src/planner/bound_result_modifier.cpp"
2
2
 
3
+ #include "src/planner/bound_parameter_map.cpp"
4
+
3
5
  #include "src/planner/expression_iterator.cpp"
4
6
 
5
7
  #include "src/planner/expression.cpp"