duckdb 0.7.2-dev402.0 → 0.7.2-dev586.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/binding.gyp +9 -9
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +0 -1
  4. package/src/duckdb/extension/parquet/parquet-extension.cpp +1 -0
  5. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +8 -6
  6. package/src/duckdb/src/catalog/catalog.cpp +13 -0
  7. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -21
  8. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +8 -2
  9. package/src/duckdb/src/catalog/catalog_set.cpp +1 -0
  10. package/src/duckdb/src/common/arrow/arrow_appender.cpp +48 -4
  11. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
  12. package/src/duckdb/src/common/field_writer.cpp +1 -0
  13. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +4 -0
  14. package/src/duckdb/src/common/serializer/buffered_file_reader.cpp +15 -2
  15. package/src/duckdb/src/common/types.cpp +136 -53
  16. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +20 -40
  17. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +9 -1
  18. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +5 -8
  19. package/src/duckdb/src/function/aggregate/distributive/bool.cpp +2 -0
  20. package/src/duckdb/src/function/aggregate/distributive/count.cpp +1 -0
  21. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -0
  22. package/src/duckdb/src/function/aggregate/distributive/sum.cpp +8 -0
  23. package/src/duckdb/src/function/aggregate/holistic/quantile.cpp +15 -0
  24. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +42 -11
  25. package/src/duckdb/src/function/function_binder.cpp +1 -8
  26. package/src/duckdb/src/function/scalar/date/current.cpp +0 -2
  27. package/src/duckdb/src/function/table/arrow.cpp +3 -0
  28. package/src/duckdb/src/function/table/arrow_conversion.cpp +18 -0
  29. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  30. package/src/duckdb/src/function/table_function.cpp +11 -11
  31. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +3 -0
  32. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
  33. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +12 -4
  34. package/src/duckdb/src/include/duckdb/common/{http_stats.hpp → http_state.hpp} +18 -4
  35. package/src/duckdb/src/include/duckdb/common/serializer/buffered_deserializer.hpp +4 -2
  36. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_reader.hpp +8 -2
  37. package/src/duckdb/src/include/duckdb/common/serializer.hpp +13 -0
  38. package/src/duckdb/src/include/duckdb/common/types.hpp +27 -1
  39. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_file_handle.hpp +1 -0
  40. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -3
  41. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +6 -3
  42. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -6
  43. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +12 -1
  44. package/src/duckdb/src/include/duckdb/function/table_function.hpp +8 -0
  45. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -3
  46. package/src/duckdb/src/include/duckdb/main/connection_manager.hpp +2 -0
  47. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/optimizer/rule/list.hpp +1 -0
  49. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +24 -0
  50. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
  51. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +2 -2
  52. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +3 -0
  53. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +6 -0
  54. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -0
  55. package/src/duckdb/src/include/duckdb/planner/binder.hpp +4 -3
  56. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +3 -0
  57. package/src/duckdb/src/include/duckdb/planner/expression/bound_aggregate_expression.hpp +3 -0
  58. package/src/duckdb/src/include/duckdb/planner/expression_binder/order_binder.hpp +4 -1
  59. package/src/duckdb/src/include/duckdb/planner/operator/logical_distinct.hpp +3 -0
  60. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/storage/meta_block_reader.hpp +7 -0
  62. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +1 -1
  63. package/src/duckdb/src/main/client_context.cpp +30 -32
  64. package/src/duckdb/src/main/client_data.cpp +7 -6
  65. package/src/duckdb/src/main/database.cpp +9 -0
  66. package/src/duckdb/src/main/query_profiler.cpp +17 -15
  67. package/src/duckdb/src/optimizer/optimizer.cpp +1 -0
  68. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +30 -0
  69. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +9 -2
  70. package/src/duckdb/src/parser/expression/star_expression.cpp +6 -6
  71. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +7 -1
  72. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +17 -2
  73. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +45 -40
  74. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +7 -0
  75. package/src/duckdb/src/parser/transform/helpers/transform_orderby.cpp +0 -7
  76. package/src/duckdb/src/parser/transform/statement/transform_rename.cpp +3 -4
  77. package/src/duckdb/src/planner/bind_context.cpp +2 -25
  78. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +6 -4
  79. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +3 -2
  80. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +176 -0
  81. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +57 -82
  82. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +11 -0
  83. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  84. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +2 -2
  85. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +1 -1
  86. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +11 -1
  87. package/src/duckdb/src/planner/binder.cpp +12 -23
  88. package/src/duckdb/src/planner/bound_result_modifier.cpp +26 -0
  89. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +9 -2
  90. package/src/duckdb/src/planner/expression_iterator.cpp +5 -0
  91. package/src/duckdb/src/planner/logical_operator.cpp +4 -2
  92. package/src/duckdb/src/planner/logical_operator_visitor.cpp +5 -0
  93. package/src/duckdb/src/planner/operator/logical_distinct.cpp +3 -0
  94. package/src/duckdb/src/planner/planner.cpp +2 -1
  95. package/src/duckdb/src/storage/checkpoint_manager.cpp +8 -3
  96. package/src/duckdb/src/storage/meta_block_reader.cpp +22 -0
  97. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  98. package/src/duckdb/src/storage/wal_replay.cpp +8 -5
  99. package/src/duckdb/src/storage/write_ahead_log.cpp +2 -2
  100. package/src/duckdb/src/transaction/commit_state.cpp +11 -7
  101. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -1
  102. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +8145 -8317
  103. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  104. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  105. package/src/duckdb/ub_src_planner_binder_expression.cpp +2 -0
@@ -795,6 +795,7 @@ AggregateFunction GetTypedDiscreteQuantileListAggregateFunction(const LogicalTyp
795
795
  using STATE = QuantileState<SAVE_TYPE>;
796
796
  using OP = QuantileListOperation<INPUT_TYPE, true>;
797
797
  auto fun = QuantileListAggregate<STATE, INPUT_TYPE, list_entry_t, OP>(type, type);
798
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
798
799
  fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, list_entry_t, OP>;
799
800
  return fun;
800
801
  }
@@ -851,6 +852,7 @@ AggregateFunction GetTypedContinuousQuantileAggregateFunction(const LogicalType
851
852
  using STATE = QuantileState<INPUT_TYPE>;
852
853
  using OP = QuantileScalarOperation<false>;
853
854
  auto fun = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, TARGET_TYPE, OP>(input_type, target_type);
855
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
854
856
  fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, TARGET_TYPE, OP>;
855
857
  return fun;
856
858
  }
@@ -904,6 +906,7 @@ AggregateFunction GetTypedContinuousQuantileListAggregateFunction(const LogicalT
904
906
  using STATE = QuantileState<INPUT_TYPE>;
905
907
  using OP = QuantileListOperation<CHILD_TYPE, false>;
906
908
  auto fun = QuantileListAggregate<STATE, INPUT_TYPE, list_entry_t, OP>(input_type, result_type);
909
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
907
910
  fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, list_entry_t, OP>;
908
911
  return fun;
909
912
  }
@@ -1129,6 +1132,7 @@ AggregateFunction GetTypedMedianAbsoluteDeviationAggregateFunction(const Logical
1129
1132
  using STATE = QuantileState<INPUT_TYPE>;
1130
1133
  using OP = MedianAbsoluteDeviationOperation<MEDIAN_TYPE>;
1131
1134
  auto fun = AggregateFunction::UnaryAggregateDestructor<STATE, INPUT_TYPE, TARGET_TYPE, OP>(input_type, target_type);
1135
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1132
1136
  fun.window = AggregateFunction::UnaryWindow<STATE, INPUT_TYPE, TARGET_TYPE, OP>;
1133
1137
  return fun;
1134
1138
  }
@@ -1198,6 +1202,7 @@ unique_ptr<FunctionData> BindMedianDecimal(ClientContext &context, AggregateFunc
1198
1202
  function.name = "median";
1199
1203
  function.serialize = QuantileSerialize;
1200
1204
  function.deserialize = QuantileDeserialize;
1205
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1201
1206
  return bind_data;
1202
1207
  }
1203
1208
 
@@ -1205,6 +1210,7 @@ unique_ptr<FunctionData> BindMedianAbsoluteDeviationDecimal(ClientContext &conte
1205
1210
  vector<unique_ptr<Expression>> &arguments) {
1206
1211
  function = GetMedianAbsoluteDeviationAggregateFunction(arguments[0]->return_type);
1207
1212
  function.name = "mad";
1213
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1208
1214
  return nullptr;
1209
1215
  }
1210
1216
 
@@ -1257,6 +1263,7 @@ unique_ptr<FunctionData> BindDiscreteQuantileDecimal(ClientContext &context, Agg
1257
1263
  function.name = "quantile_disc";
1258
1264
  function.serialize = QuantileDecimalSerialize;
1259
1265
  function.deserialize = QuantileDeserialize;
1266
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1260
1267
  return bind_data;
1261
1268
  }
1262
1269
 
@@ -1267,6 +1274,7 @@ unique_ptr<FunctionData> BindDiscreteQuantileDecimalList(ClientContext &context,
1267
1274
  function.name = "quantile_disc";
1268
1275
  function.serialize = QuantileDecimalSerialize;
1269
1276
  function.deserialize = QuantileDeserialize;
1277
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1270
1278
  return bind_data;
1271
1279
  }
1272
1280
 
@@ -1277,6 +1285,7 @@ unique_ptr<FunctionData> BindContinuousQuantileDecimal(ClientContext &context, A
1277
1285
  function.name = "quantile_cont";
1278
1286
  function.serialize = QuantileDecimalSerialize;
1279
1287
  function.deserialize = QuantileDeserialize;
1288
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1280
1289
  return bind_data;
1281
1290
  }
1282
1291
 
@@ -1287,6 +1296,7 @@ unique_ptr<FunctionData> BindContinuousQuantileDecimalList(ClientContext &contex
1287
1296
  function.name = "quantile_cont";
1288
1297
  function.serialize = QuantileDecimalSerialize;
1289
1298
  function.deserialize = QuantileDeserialize;
1299
+ function.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1290
1300
  return bind_data;
1291
1301
  }
1292
1302
 
@@ -1316,6 +1326,7 @@ AggregateFunction GetDiscreteQuantileAggregate(const LogicalType &type) {
1316
1326
  fun.deserialize = QuantileDeserialize;
1317
1327
  // temporarily push an argument so we can bind the actual quantile
1318
1328
  fun.arguments.emplace_back(LogicalType::DOUBLE);
1329
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1319
1330
  return fun;
1320
1331
  }
1321
1332
 
@@ -1327,6 +1338,7 @@ AggregateFunction GetDiscreteQuantileListAggregate(const LogicalType &type) {
1327
1338
  // temporarily push an argument so we can bind the actual quantile
1328
1339
  auto list_of_double = LogicalType::LIST(LogicalType::DOUBLE);
1329
1340
  fun.arguments.push_back(list_of_double);
1341
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1330
1342
  return fun;
1331
1343
  }
1332
1344
 
@@ -1337,6 +1349,7 @@ AggregateFunction GetContinuousQuantileAggregate(const LogicalType &type) {
1337
1349
  fun.deserialize = QuantileDeserialize;
1338
1350
  // temporarily push an argument so we can bind the actual quantile
1339
1351
  fun.arguments.emplace_back(LogicalType::DOUBLE);
1352
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1340
1353
  return fun;
1341
1354
  }
1342
1355
 
@@ -1348,6 +1361,7 @@ AggregateFunction GetContinuousQuantileListAggregate(const LogicalType &type) {
1348
1361
  // temporarily push an argument so we can bind the actual quantile
1349
1362
  auto list_of_double = LogicalType::LIST(LogicalType::DOUBLE);
1350
1363
  fun.arguments.push_back(list_of_double);
1364
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1351
1365
  return fun;
1352
1366
  }
1353
1367
 
@@ -1357,6 +1371,7 @@ AggregateFunction GetQuantileDecimalAggregate(const vector<LogicalType> &argumen
1357
1371
  fun.bind = bind;
1358
1372
  fun.serialize = QuantileSerialize;
1359
1373
  fun.deserialize = QuantileDeserialize;
1374
+ fun.order_dependent = AggregateOrderDependent::NOT_ORDER_DEPENDENT;
1360
1375
  return fun;
1361
1376
  }
1362
1377
 
@@ -3,6 +3,9 @@
3
3
  #include "duckdb/common/types/column_data_collection.hpp"
4
4
  #include "duckdb/function/function_binder.hpp"
5
5
  #include "duckdb/storage/buffer_manager.hpp"
6
+ #include "duckdb/planner/expression/bound_aggregate_expression.hpp"
7
+ #include "duckdb/parser/expression_map.hpp"
8
+ #include "duckdb/function/aggregate/distributive_functions.hpp"
6
9
 
7
10
  namespace duckdb {
8
11
 
@@ -363,16 +366,44 @@ struct SortedAggregateFunction {
363
366
  }
364
367
  };
365
368
 
366
- unique_ptr<FunctionData> FunctionBinder::BindSortedAggregate(AggregateFunction &bound_function,
367
- vector<unique_ptr<Expression>> &children,
368
- unique_ptr<FunctionData> bind_info,
369
- unique_ptr<BoundOrderModifier> order_bys) {
370
-
371
- auto sorted_bind =
372
- make_unique<SortedAggregateBindData>(context, bound_function, children, std::move(bind_info), *order_bys);
369
+ void FunctionBinder::BindSortedAggregate(ClientContext &context, BoundAggregateExpression &expr,
370
+ const vector<unique_ptr<Expression>> &groups) {
371
+ if (!expr.order_bys || expr.order_bys->orders.empty() || expr.children.empty()) {
372
+ // not a sorted aggregate: return
373
+ return;
374
+ }
375
+ if (context.config.enable_optimizer) {
376
+ // for each ORDER BY - check if it is actually necessary
377
+ // expressions that are in the groups do not need to be ORDERED BY
378
+ // `ORDER BY` on a group has no effect, because for each aggregate, the group is unique
379
+ // similarly, we only need to ORDER BY each aggregate once
380
+ expression_set_t seen_expressions;
381
+ for (auto &target : groups) {
382
+ seen_expressions.insert(target.get());
383
+ }
384
+ vector<BoundOrderByNode> new_order_nodes;
385
+ for (auto &order_node : expr.order_bys->orders) {
386
+ if (seen_expressions.find(order_node.expression.get()) != seen_expressions.end()) {
387
+ // we do not need to order by this node
388
+ continue;
389
+ }
390
+ seen_expressions.insert(order_node.expression.get());
391
+ new_order_nodes.push_back(std::move(order_node));
392
+ }
393
+ if (new_order_nodes.empty()) {
394
+ expr.order_bys.reset();
395
+ return;
396
+ }
397
+ expr.order_bys->orders = std::move(new_order_nodes);
398
+ }
399
+ auto &bound_function = expr.function;
400
+ auto &children = expr.children;
401
+ auto &order_bys = *expr.order_bys;
402
+ auto sorted_bind = make_unique<SortedAggregateBindData>(context, bound_function, expr.children,
403
+ std::move(expr.bind_info), order_bys);
373
404
 
374
405
  // The arguments are the children plus the sort columns.
375
- for (auto &order : order_bys->orders) {
406
+ for (auto &order : order_bys.orders) {
376
407
  children.emplace_back(std::move(order.expression));
377
408
  }
378
409
 
@@ -392,9 +423,9 @@ unique_ptr<FunctionData> FunctionBinder::BindSortedAggregate(AggregateFunction &
392
423
  AggregateFunction::StateDestroy<SortedAggregateState, SortedAggregateFunction>, nullptr,
393
424
  SortedAggregateFunction::Window, SortedAggregateFunction::Serialize, SortedAggregateFunction::Deserialize);
394
425
 
395
- bound_function = std::move(ordered_aggregate);
396
-
397
- return std::move(sorted_bind);
426
+ expr.function = std::move(ordered_aggregate);
427
+ expr.bind_info = std::move(sorted_bind);
428
+ expr.order_bys.reset();
398
429
  }
399
430
 
400
431
  } // namespace duckdb
@@ -294,8 +294,7 @@ unique_ptr<BoundFunctionExpression> FunctionBinder::BindScalarFunction(ScalarFun
294
294
  unique_ptr<BoundAggregateExpression> FunctionBinder::BindAggregateFunction(AggregateFunction bound_function,
295
295
  vector<unique_ptr<Expression>> children,
296
296
  unique_ptr<Expression> filter,
297
- AggregateType aggr_type,
298
- unique_ptr<BoundOrderModifier> order_bys) {
297
+ AggregateType aggr_type) {
299
298
  unique_ptr<FunctionData> bind_info;
300
299
  if (bound_function.bind) {
301
300
  bind_info = bound_function.bind(context, bound_function, children);
@@ -306,12 +305,6 @@ unique_ptr<BoundAggregateExpression> FunctionBinder::BindAggregateFunction(Aggre
306
305
  // check if we need to add casts to the children
307
306
  CastToFunctionArguments(bound_function, children);
308
307
 
309
- // Special case: for ORDER BY aggregates, we wrap the aggregate function in a SortedAggregateFunction
310
- // The children are the sort clauses and the binding contains the ordering data.
311
- if (order_bys && !order_bys->orders.empty()) {
312
- bind_info = BindSortedAggregate(bound_function, children, std::move(bind_info), std::move(order_bys));
313
- }
314
-
315
308
  return make_unique<BoundAggregateExpression>(std::move(bound_function), std::move(children), std::move(filter),
316
309
  std::move(bind_info), aggr_type);
317
310
  }
@@ -35,14 +35,12 @@ static void CurrentTimestampFunction(DataChunk &input, ExpressionState &state, V
35
35
 
36
36
  void CurrentTimeFun::RegisterFunction(BuiltinFunctions &set) {
37
37
  ScalarFunction current_time("get_current_time", {}, LogicalType::TIME, CurrentTimeFunction);
38
- ;
39
38
  current_time.side_effects = FunctionSideEffects::HAS_SIDE_EFFECTS;
40
39
  set.AddFunction(current_time);
41
40
  }
42
41
 
43
42
  void CurrentDateFun::RegisterFunction(BuiltinFunctions &set) {
44
43
  ScalarFunction current_date({}, LogicalType::DATE, CurrentDateFunction);
45
- ;
46
44
  current_date.side_effects = FunctionSideEffects::HAS_SIDE_EFFECTS;
47
45
  set.AddFunction({"today", "current_date"}, current_date);
48
46
  }
@@ -101,6 +101,9 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
101
101
  } else if (format == "tiM") {
102
102
  arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MONTHS);
103
103
  return LogicalType::INTERVAL;
104
+ } else if (format == "tin") {
105
+ arrow_convert_data[col_idx]->date_time_precision.emplace_back(ArrowDateTimeType::MONTH_DAY_NANO);
106
+ return LogicalType::INTERVAL;
104
107
  } else if (format == "+l") {
105
108
  arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
106
109
  auto child_type = GetArrowLogicalType(*schema.children[0], arrow_convert_data, col_idx);
@@ -316,6 +316,20 @@ void IntervalConversionMonths(Vector &vector, ArrowArray &array, ArrowScanLocalS
316
316
  }
317
317
  }
318
318
 
319
+ void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state,
320
+ int64_t nested_offset, idx_t size) {
321
+ auto tgt_ptr = (interval_t *)FlatVector::GetData(vector);
322
+ auto src_ptr = (ArrowInterval *)array.buffers[1] + scan_state.chunk_offset + array.offset;
323
+ if (nested_offset != -1) {
324
+ src_ptr = (ArrowInterval *)array.buffers[1] + nested_offset + array.offset;
325
+ }
326
+ for (idx_t row = 0; row < size; row++) {
327
+ tgt_ptr[row].days = src_ptr[row].days;
328
+ tgt_ptr[row].micros = src_ptr[row].nanoseconds / Interval::NANOS_PER_MICRO;
329
+ tgt_ptr[row].months = src_ptr[row].months;
330
+ }
331
+ }
332
+
319
333
  void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
320
334
  std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
321
335
  std::pair<idx_t, idx_t> &arrow_convert_idx, int64_t nested_offset, ValidityMask *parent_mask) {
@@ -509,6 +523,10 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
509
523
  IntervalConversionMonths(vector, array, scan_state, nested_offset, size);
510
524
  break;
511
525
  }
526
+ case ArrowDateTimeType::MONTH_DAY_NANO: {
527
+ IntervalConversionMonthDayNanos(vector, array, scan_state, nested_offset, size);
528
+ break;
529
+ }
512
530
  default:
513
531
  throw std::runtime_error("Unsupported precision for Interval/Duration Type ");
514
532
  }
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev402"
2
+ #define DUCKDB_VERSION "0.7.2-dev586"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "ab9736bed0"
5
+ #define DUCKDB_SOURCE_ID "23ee8b036a"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -14,12 +14,12 @@ TableFunctionInfo::~TableFunctionInfo() {
14
14
  TableFunction::TableFunction(string name, vector<LogicalType> arguments, table_function_t function,
15
15
  table_function_bind_t bind, table_function_init_global_t init_global,
16
16
  table_function_init_local_t init_local)
17
- : SimpleNamedParameterFunction(std::move(name), std::move(arguments)), bind(bind), init_global(init_global),
18
- init_local(init_local), function(function), in_out_function(nullptr), in_out_function_final(nullptr),
19
- statistics(nullptr), dependency(nullptr), cardinality(nullptr), pushdown_complex_filter(nullptr),
20
- to_string(nullptr), table_scan_progress(nullptr), get_batch_index(nullptr), get_batch_info(nullptr),
21
- serialize(nullptr), deserialize(nullptr), projection_pushdown(false), filter_pushdown(false),
22
- filter_prune(false) {
17
+ : SimpleNamedParameterFunction(std::move(name), std::move(arguments)), bind(bind), bind_replace(nullptr),
18
+ init_global(init_global), init_local(init_local), function(function), in_out_function(nullptr),
19
+ in_out_function_final(nullptr), statistics(nullptr), dependency(nullptr), cardinality(nullptr),
20
+ pushdown_complex_filter(nullptr), to_string(nullptr), table_scan_progress(nullptr), get_batch_index(nullptr),
21
+ get_batch_info(nullptr), serialize(nullptr), deserialize(nullptr), projection_pushdown(false),
22
+ filter_pushdown(false), filter_prune(false) {
23
23
  }
24
24
 
25
25
  TableFunction::TableFunction(const vector<LogicalType> &arguments, table_function_t function,
@@ -28,11 +28,11 @@ TableFunction::TableFunction(const vector<LogicalType> &arguments, table_functio
28
28
  : TableFunction(string(), arguments, function, bind, init_global, init_local) {
29
29
  }
30
30
  TableFunction::TableFunction()
31
- : SimpleNamedParameterFunction("", {}), bind(nullptr), init_global(nullptr), init_local(nullptr), function(nullptr),
32
- in_out_function(nullptr), statistics(nullptr), dependency(nullptr), cardinality(nullptr),
33
- pushdown_complex_filter(nullptr), to_string(nullptr), table_scan_progress(nullptr), get_batch_index(nullptr),
34
- get_batch_info(nullptr), serialize(nullptr), deserialize(nullptr), projection_pushdown(false),
35
- filter_pushdown(false), filter_prune(false) {
31
+ : SimpleNamedParameterFunction("", {}), bind(nullptr), bind_replace(nullptr), init_global(nullptr),
32
+ init_local(nullptr), function(nullptr), in_out_function(nullptr), statistics(nullptr), dependency(nullptr),
33
+ cardinality(nullptr), pushdown_complex_filter(nullptr), to_string(nullptr), table_scan_progress(nullptr),
34
+ get_batch_index(nullptr), get_batch_info(nullptr), serialize(nullptr), deserialize(nullptr),
35
+ projection_pushdown(false), filter_pushdown(false), filter_prune(false) {
36
36
  }
37
37
 
38
38
  bool TableFunction::Equal(const TableFunction &rhs) const {
@@ -205,6 +205,9 @@ public:
205
205
  DUCKDB_API static LogicalType GetType(ClientContext &context, const string &catalog_name, const string &schema,
206
206
  const string &name);
207
207
 
208
+ static bool TypeExists(ClientContext &context, const string &catalog_name, const string &schema,
209
+ const string &name);
210
+
208
211
  template <class T>
209
212
  T *GetEntry(ClientContext &context, const string &schema_name, const string &name, bool if_exists = false,
210
213
  QueryErrorContext error_context = QueryErrorContext()) {
@@ -35,7 +35,7 @@ public:
35
35
 
36
36
  void SetAsRoot() override;
37
37
 
38
- void CommitAlter(AlterInfo &info);
38
+ void CommitAlter(string &column_name);
39
39
  void CommitDrop();
40
40
 
41
41
  TableFunction GetScanFunction(ClientContext &context, unique_ptr<FunctionData> &bind_data) override;
@@ -25,7 +25,7 @@ struct IndexWriteOperation {
25
25
 
26
26
  class FieldWriter {
27
27
  public:
28
- DUCKDB_API FieldWriter(Serializer &serializer);
28
+ DUCKDB_API explicit FieldWriter(Serializer &serializer);
29
29
  DUCKDB_API ~FieldWriter();
30
30
 
31
31
  public:
@@ -128,11 +128,11 @@ public:
128
128
  return *buffer;
129
129
  }
130
130
 
131
- private:
132
131
  void AddField() {
133
132
  field_count++;
134
133
  }
135
134
 
135
+ private:
136
136
  template <class T>
137
137
  void Write(const T &element) {
138
138
  WriteData((const_data_ptr_t)&element, sizeof(T));
@@ -152,7 +152,7 @@ DUCKDB_API void FieldWriter::Write(const string &val);
152
152
 
153
153
  class FieldDeserializer : public Deserializer {
154
154
  public:
155
- FieldDeserializer(Deserializer &root);
155
+ explicit FieldDeserializer(Deserializer &root);
156
156
 
157
157
  public:
158
158
  void ReadData(data_ptr_t buffer, idx_t read_size) override;
@@ -163,6 +163,14 @@ public:
163
163
  return root;
164
164
  }
165
165
 
166
+ ClientContext &GetContext() override {
167
+ return root.GetContext();
168
+ }
169
+
170
+ Catalog *GetCatalog() override {
171
+ return root.GetCatalog();
172
+ }
173
+
166
174
  private:
167
175
  Deserializer &root;
168
176
  idx_t remaining_data;
@@ -177,7 +185,7 @@ struct IndexReadOperation {
177
185
 
178
186
  class FieldReader {
179
187
  public:
180
- DUCKDB_API FieldReader(Deserializer &source);
188
+ DUCKDB_API explicit FieldReader(Deserializer &source);
181
189
  DUCKDB_API ~FieldReader();
182
190
 
183
191
  public:
@@ -1,7 +1,7 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/common/http_stats.hpp
4
+ // duckdb/common/http_state.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
@@ -15,7 +15,16 @@
15
15
 
16
16
  namespace duckdb {
17
17
 
18
- class HTTPStats {
18
+ struct CachedFile {
19
+ //! Cached Data
20
+ shared_ptr<char> data;
21
+ //! Data capacity
22
+ uint64_t capacity = 0;
23
+ //! If we finished downloading the file
24
+ bool finished = false;
25
+ };
26
+
27
+ class HTTPState {
19
28
  public:
20
29
  atomic<idx_t> head_count {0};
21
30
  atomic<idx_t> get_count {0};
@@ -23,6 +32,10 @@ public:
23
32
  atomic<idx_t> post_count {0};
24
33
  atomic<idx_t> total_bytes_received {0};
25
34
  atomic<idx_t> total_bytes_sent {0};
35
+ //! Mutex to lock when getting the cached file(Parallel Only)
36
+ mutex cached_files_mutex;
37
+ //! In case of fully downloading the file, the cached files of this query
38
+ unordered_map<string, CachedFile> cached_files;
26
39
 
27
40
  void Reset() {
28
41
  head_count = 0;
@@ -31,13 +44,14 @@ public:
31
44
  post_count = 0;
32
45
  total_bytes_received = 0;
33
46
  total_bytes_sent = 0;
47
+ cached_files.clear();
34
48
  }
35
49
 
36
50
  //! helper function to get the HTTP
37
- static HTTPStats *TryGetStats(FileOpener *opener) {
51
+ static HTTPState *TryGetState(FileOpener *opener) {
38
52
  auto client_context = FileOpener::TryGetClientContext(opener);
39
53
  if (client_context) {
40
- return client_context->client_data->http_stats.get();
54
+ return client_context->client_data->http_state.get();
41
55
  }
42
56
  return nullptr;
43
57
  }
@@ -26,14 +26,16 @@ public:
26
26
  void ReadData(data_ptr_t buffer, uint64_t read_size) override;
27
27
  };
28
28
 
29
- class BufferentContextDeserializer : public BufferedDeserializer {
29
+ class BufferedContextDeserializer : public BufferedDeserializer {
30
30
  public:
31
- BufferentContextDeserializer(ClientContext &context_p, data_ptr_t ptr, idx_t data_size)
31
+ BufferedContextDeserializer(ClientContext &context_p, data_ptr_t ptr, idx_t data_size)
32
32
  : BufferedDeserializer(ptr, data_size), context(context_p) {
33
33
  }
34
34
 
35
35
  public:
36
36
  ClientContext &context;
37
+
38
+ ClientContext &GetContext() override;
37
39
  };
38
40
 
39
41
  } // namespace duckdb
@@ -14,14 +14,16 @@ namespace duckdb {
14
14
 
15
15
  class BufferedFileReader : public Deserializer {
16
16
  public:
17
- BufferedFileReader(FileSystem &fs, const char *path, FileLockType lock_type = FileLockType::READ_LOCK,
18
- FileOpener *opener = nullptr);
17
+ BufferedFileReader(FileSystem &fs, const char *path, ClientContext *context,
18
+ FileLockType lock_type = FileLockType::READ_LOCK, FileOpener *opener = nullptr);
19
19
 
20
20
  FileSystem &fs;
21
21
  unique_ptr<data_t[]> data;
22
22
  idx_t offset;
23
23
  idx_t read_data;
24
24
  unique_ptr<FileHandle> handle;
25
+ ClientContext *context;
26
+ Catalog *catalog = nullptr;
25
27
 
26
28
  public:
27
29
  void ReadData(data_ptr_t buffer, uint64_t read_size) override;
@@ -35,6 +37,10 @@ public:
35
37
  void Seek(uint64_t location);
36
38
  uint64_t CurrentOffset();
37
39
 
40
+ ClientContext &GetContext() override;
41
+
42
+ Catalog *GetCatalog() override;
43
+
38
44
  private:
39
45
  idx_t file_size;
40
46
  idx_t total_read;
@@ -8,6 +8,7 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include "duckdb/catalog/catalog.hpp"
11
12
  #include "duckdb/common/common.hpp"
12
13
  #include "duckdb/common/exception.hpp"
13
14
  #include "duckdb/common/vector.hpp"
@@ -21,6 +22,8 @@ private:
21
22
  uint64_t version = 0L;
22
23
 
23
24
  public:
25
+ bool is_query_plan = false;
26
+
24
27
  virtual ~Serializer() {
25
28
  }
26
29
 
@@ -111,6 +114,16 @@ public:
111
114
  //! Reads [read_size] bytes into the buffer
112
115
  virtual void ReadData(data_ptr_t buffer, idx_t read_size) = 0;
113
116
 
117
+ //! Gets the context for the deserializer
118
+ virtual ClientContext &GetContext() {
119
+ throw InternalException("This deserializer does not have a client-context");
120
+ };
121
+
122
+ //! Gets the catalog for the deserializer
123
+ virtual Catalog *GetCatalog() {
124
+ return nullptr;
125
+ };
126
+
114
127
  template <class T>
115
128
  T Read() {
116
129
  T value;
@@ -23,6 +23,20 @@ class Value;
23
23
  class TypeCatalogEntry;
24
24
  class Vector;
25
25
  class ClientContext;
26
+ class FieldWriter;
27
+
28
+ //! Extra Type Info Type
29
+ enum class ExtraTypeInfoType : uint8_t {
30
+ INVALID_TYPE_INFO = 0,
31
+ GENERIC_TYPE_INFO = 1,
32
+ DECIMAL_TYPE_INFO = 2,
33
+ STRING_TYPE_INFO = 3,
34
+ LIST_TYPE_INFO = 4,
35
+ STRUCT_TYPE_INFO = 5,
36
+ ENUM_TYPE_INFO = 6,
37
+ USER_TYPE_INFO = 7,
38
+ AGGREGATE_STATE_TYPE_INFO = 8
39
+ };
26
40
 
27
41
  struct hugeint_t {
28
42
  public:
@@ -297,6 +311,11 @@ struct LogicalType {
297
311
  inline const ExtraTypeInfo *AuxInfo() const {
298
312
  return type_info_.get();
299
313
  }
314
+
315
+ inline shared_ptr<ExtraTypeInfo> GetAuxInfoShrPtr() const {
316
+ return type_info_;
317
+ }
318
+
300
319
  inline void CopyAuxInfo(const LogicalType& other) {
301
320
  type_info_ = other.type_info_;
302
321
  }
@@ -324,6 +343,9 @@ struct LogicalType {
324
343
 
325
344
  //! Serializes a LogicalType to a stand-alone binary blob
326
345
  DUCKDB_API void Serialize(Serializer &serializer) const;
346
+
347
+ DUCKDB_API void SerializeEnumType(Serializer &serializer) const;
348
+
327
349
  //! Deserializes a blob back into an LogicalType
328
350
  DUCKDB_API static LogicalType Deserialize(Deserializer &source);
329
351
 
@@ -349,6 +371,8 @@ struct LogicalType {
349
371
  DUCKDB_API static void SetCatalog(LogicalType &type, TypeCatalogEntry* catalog_entry);
350
372
  DUCKDB_API static TypeCatalogEntry* GetCatalog(const LogicalType &type);
351
373
 
374
+ DUCKDB_API static ExtraTypeInfoType GetExtraTypeInfoType(const ExtraTypeInfo &type);
375
+
352
376
  //! Gets the decimal properties of a numeric type. Fails if the type is not numeric.
353
377
  DUCKDB_API bool GetDecimalProperties(uint8_t &width, uint8_t &scale) const;
354
378
 
@@ -441,12 +465,14 @@ struct UserType{
441
465
  struct EnumType{
442
466
  DUCKDB_API static const string &GetTypeName(const LogicalType &type);
443
467
  DUCKDB_API static int64_t GetPos(const LogicalType &type, const string_t& key);
444
- DUCKDB_API static Vector &GetValuesInsertOrder(const LogicalType &type);
468
+ DUCKDB_API static const Vector &GetValuesInsertOrder(const LogicalType &type);
445
469
  DUCKDB_API static idx_t GetSize(const LogicalType &type);
446
470
  DUCKDB_API static const string GetValue(const Value &val);
447
471
  DUCKDB_API static void SetCatalog(LogicalType &type, TypeCatalogEntry* catalog_entry);
448
472
  DUCKDB_API static TypeCatalogEntry* GetCatalog(const LogicalType &type);
473
+ DUCKDB_API static string GetSchemaName(const LogicalType &type);
449
474
  DUCKDB_API static PhysicalType GetPhysicalType(const LogicalType &type);
475
+ DUCKDB_API static void Serialize(FieldWriter& writer, const ExtraTypeInfo& type_info, bool serialize_internals);
450
476
  };
451
477
 
452
478
  struct StructType {
@@ -87,6 +87,7 @@ public:
87
87
  // we have data left to read from the file
88
88
  // read directly into the buffer
89
89
  auto bytes_read = file_handle->Read((char *)buffer + result_offset, nr_bytes - result_offset);
90
+ file_size = file_handle->GetFileSize();
90
91
  read_position += bytes_read;
91
92
  if (reset_enabled) {
92
93
  // if reset caching is enabled, we need to cache the bytes that we have read
@@ -88,9 +88,6 @@ protected:
88
88
  unique_ptr<PhysicalOperator> CreatePlan(LogicalRecursiveCTE &op);
89
89
  unique_ptr<PhysicalOperator> CreatePlan(LogicalCTERef &op);
90
90
 
91
- unique_ptr<PhysicalOperator> CreateDistinctOn(unique_ptr<PhysicalOperator> child,
92
- vector<unique_ptr<Expression>> distinct_targets);
93
-
94
91
  unique_ptr<PhysicalOperator> ExtractAggregateExpressions(unique_ptr<PhysicalOperator> child,
95
92
  vector<unique_ptr<Expression>> &expressions,
96
93
  vector<unique_ptr<Expression>> &groups);
@@ -17,8 +17,9 @@
17
17
 
18
18
  namespace duckdb {
19
19
 
20
- //! This allows us to use the & operator to check if the type is contained in the set
21
20
  enum class AggregateType : uint8_t { NON_DISTINCT = 1, DISTINCT = 2 };
21
+ //! Whether or not the input order influences the result of the aggregate
22
+ enum class AggregateOrderDependent : uint8_t { ORDER_DEPENDENT = 1, NOT_ORDER_DEPENDENT = 2 };
22
23
 
23
24
  class BoundAggregateExpression;
24
25
 
@@ -92,7 +93,7 @@ public:
92
93
  LogicalType(LogicalTypeId::INVALID), null_handling),
93
94
  state_size(state_size), initialize(initialize), update(update), combine(combine), finalize(finalize),
94
95
  simple_update(simple_update), window(window), bind(bind), destructor(destructor), statistics(statistics),
95
- serialize(serialize), deserialize(deserialize) {
96
+ serialize(serialize), deserialize(deserialize), order_dependent(AggregateOrderDependent::ORDER_DEPENDENT) {
96
97
  }
97
98
 
98
99
  DUCKDB_API
@@ -107,7 +108,7 @@ public:
107
108
  LogicalType(LogicalTypeId::INVALID)),
108
109
  state_size(state_size), initialize(initialize), update(update), combine(combine), finalize(finalize),
109
110
  simple_update(simple_update), window(window), bind(bind), destructor(destructor), statistics(statistics),
110
- serialize(serialize), deserialize(deserialize) {
111
+ serialize(serialize), deserialize(deserialize), order_dependent(AggregateOrderDependent::ORDER_DEPENDENT) {
111
112
  }
112
113
 
113
114
  DUCKDB_API AggregateFunction(const vector<LogicalType> &arguments, const LogicalType &return_type,
@@ -160,6 +161,8 @@ public:
160
161
 
161
162
  aggregate_serialize_t serialize;
162
163
  aggregate_deserialize_t deserialize;
164
+ //! Whether or not the aggregate is order dependent
165
+ AggregateOrderDependent order_dependent;
163
166
 
164
167
  DUCKDB_API bool operator==(const AggregateFunction &rhs) const {
165
168
  return state_size == rhs.state_size && initialize == rhs.initialize && update == rhs.update &&