duckdb 1.2.1-dev4.0 → 1.2.1-dev8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/package.json +1 -1
  2. package/src/connection.cpp +57 -35
  3. package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
  4. package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
  5. package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
  6. package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
  7. package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
  8. package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
  9. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
  10. package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
  11. package/src/duckdb/extension/json/json_extension.cpp +8 -3
  12. package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
  13. package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
  14. package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
  15. package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
  16. package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
  17. package/src/duckdb/src/catalog/catalog.cpp +12 -0
  18. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  19. package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
  20. package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
  21. package/src/duckdb/src/common/bind_helpers.cpp +3 -0
  22. package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
  23. package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
  24. package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
  25. package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
  26. package/src/duckdb/src/execution/index/art/art.cpp +19 -6
  27. package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
  28. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
  29. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
  30. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
  31. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
  32. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
  33. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
  34. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
  35. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
  36. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
  37. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
  38. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
  39. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
  40. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
  41. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
  42. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
  43. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
  44. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
  45. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
  46. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
  47. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
  48. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
  49. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  50. package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
  51. package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
  52. package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
  53. package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
  54. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
  56. package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
  58. package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
  59. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
  60. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
  61. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
  63. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
  65. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
  66. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
  67. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
  68. package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
  69. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
  70. package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
  71. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
  72. package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
  74. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
  75. package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
  76. package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
  77. package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
  78. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
  79. package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
  80. package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
  81. package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
  82. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
  84. package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
  85. package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
  86. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
  87. package/src/duckdb/src/include/duckdb.h +495 -480
  88. package/src/duckdb/src/main/attached_database.cpp +1 -1
  89. package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
  90. package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
  91. package/src/duckdb/src/main/config.cpp +7 -1
  92. package/src/duckdb/src/main/database.cpp +8 -8
  93. package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
  94. package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
  95. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
  96. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
  97. package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
  98. package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
  99. package/src/duckdb/src/parallel/executor_task.cpp +10 -6
  100. package/src/duckdb/src/parallel/task_executor.cpp +4 -1
  101. package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
  102. package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
  103. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
  104. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
  105. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
  106. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
  107. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
  108. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
  109. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  110. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
  111. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  112. package/src/duckdb/src/storage/storage_manager.cpp +2 -2
  113. package/src/duckdb/src/storage/table/row_group.cpp +5 -6
  114. package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
  115. package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
  116. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
  117. package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
  118. package/src/duckdb/ub_src_parallel.cpp +2 -0
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "1.2.1-dev4.0",
5
+ "version": "1.2.1-dev8.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -17,12 +17,12 @@ Napi::FunctionReference Connection::Init(Napi::Env env, Napi::Object exports) {
17
17
  Napi::HandleScope scope(env);
18
18
 
19
19
  Napi::Function t = DefineClass(
20
- env, "Connection",
21
- {InstanceMethod("prepare", &Connection::Prepare), InstanceMethod("exec", &Connection::Exec),
22
- InstanceMethod("register_udf_bulk", &Connection::RegisterUdf),
23
- InstanceMethod("register_buffer", &Connection::RegisterBuffer),
24
- InstanceMethod("unregister_udf", &Connection::UnregisterUdf), InstanceMethod("close", &Connection::Close),
25
- InstanceMethod("unregister_buffer", &Connection::UnRegisterBuffer)});
20
+ env, "Connection",
21
+ {InstanceMethod("prepare", &Connection::Prepare), InstanceMethod("exec", &Connection::Exec),
22
+ InstanceMethod("register_udf_bulk", &Connection::RegisterUdf),
23
+ InstanceMethod("register_buffer", &Connection::RegisterBuffer),
24
+ InstanceMethod("unregister_udf", &Connection::UnregisterUdf), InstanceMethod("close", &Connection::Close),
25
+ InstanceMethod("unregister_buffer", &Connection::UnRegisterBuffer)});
26
26
 
27
27
  exports.Set("Connection", t);
28
28
 
@@ -234,14 +234,14 @@ void DuckDBNodeUDFLauncher(Napi::Env env, Napi::Function jsudf, std::nullptr_t *
234
234
 
235
235
  struct RegisterUdfTask : public Task {
236
236
  RegisterUdfTask(Connection &connection, std::string name, std::string return_type_name, Napi::Function callback)
237
- : Task(connection, callback), name(std::move(name)), return_type_name(std::move(return_type_name)) {
237
+ : Task(connection, callback), name(std::move(name)), return_type_name(std::move(return_type_name)) {
238
238
  }
239
239
 
240
240
  void DoWork() override {
241
241
  auto &connection = Get<Connection>();
242
242
  auto &udf_ptr = connection.udfs[name];
243
243
  duckdb::scalar_function_t udf_function = [&udf_ptr](duckdb::DataChunk &args, duckdb::ExpressionState &state,
244
- duckdb::Vector &result) -> void {
244
+ duckdb::Vector &result) -> void {
245
245
  // here we can do only DuckDB stuff because we do not have a functioning env
246
246
 
247
247
  // Flatten all args to simplify udfs
@@ -271,7 +271,7 @@ struct RegisterUdfTask : public Task {
271
271
  auto return_type = cast.cast_type;
272
272
 
273
273
  connection.connection->CreateVectorizedFunction(name, vector<duckdb::LogicalType> {}, return_type, udf_function,
274
- duckdb::LogicalType::ANY);
274
+ duckdb::LogicalType::ANY);
275
275
  }
276
276
  std::string name;
277
277
  std::string return_type_name;
@@ -296,7 +296,7 @@ Napi::Value Connection::RegisterUdf(const Napi::CallbackInfo &info) {
296
296
  }
297
297
 
298
298
  auto udf = duckdb_node_udf_function_t::New(env, udf_callback, "duckdb_node_udf" + name, 0, 1, nullptr,
299
- [](Napi::Env, void *, std::nullptr_t *ctx) {});
299
+ [](Napi::Env, void *, std::nullptr_t *ctx) {});
300
300
 
301
301
  // we have to unref the udf because otherwise there is a circular ref with the connection somehow(?)
302
302
  // this took far too long to figure out
@@ -304,14 +304,14 @@ Napi::Value Connection::RegisterUdf(const Napi::CallbackInfo &info) {
304
304
  udfs[name] = udf;
305
305
 
306
306
  database_ref->Schedule(info.Env(),
307
- duckdb::make_uniq<RegisterUdfTask>(*this, name, return_type_name, completion_callback));
307
+ duckdb::make_uniq<RegisterUdfTask>(*this, name, return_type_name, completion_callback));
308
308
 
309
309
  return Value();
310
310
  }
311
311
 
312
312
  struct UnregisterUdfTask : public Task {
313
313
  UnregisterUdfTask(Connection &connection, std::string name, Napi::Function callback)
314
- : Task(connection, callback), name(std::move(name)) {
314
+ : Task(connection, callback), name(std::move(name)) {
315
315
  }
316
316
 
317
317
  void DoWork() override {
@@ -354,7 +354,7 @@ Napi::Value Connection::UnregisterUdf(const Napi::CallbackInfo &info) {
354
354
 
355
355
  struct ExecTask : public Task {
356
356
  ExecTask(Connection &connection, std::string sql, Napi::Function callback)
357
- : Task(connection, callback), sql(std::move(sql)) {
357
+ : Task(connection, callback), sql(std::move(sql)) {
358
358
  }
359
359
 
360
360
  void DoWork() override {
@@ -395,8 +395,8 @@ struct ExecTask : public Task {
395
395
 
396
396
  struct ExecTaskWithCallback : public ExecTask {
397
397
  ExecTaskWithCallback(Connection &connection, std::string sql, Napi::Function js_callback,
398
- std::function<void(void)> cpp_callback)
399
- : ExecTask(connection, sql, js_callback), cpp_callback(cpp_callback) {
398
+ std::function<void(void)> cpp_callback)
399
+ : ExecTask(connection, sql, js_callback), cpp_callback(cpp_callback) {
400
400
  }
401
401
 
402
402
  void Callback() override {
@@ -456,24 +456,41 @@ Napi::Value Connection::Exec(const Napi::CallbackInfo &info) {
456
456
  }
457
457
 
458
458
  struct CreateArrowViewTask : public Task {
459
- CreateArrowViewTask(Connection &connection, duckdb::vector<duckdb::Value>& parameters, std::string &view_name)
460
- : Task(connection), parameters(parameters), view_name(view_name) {
459
+ CreateArrowViewTask(Connection &connection, duckdb::vector<duckdb::Value>& parameters, std::string &view_name, Napi::Function callback)
460
+ : Task(connection, callback), parameters(parameters), view_name(view_name) {
461
461
  }
462
462
 
463
463
  void DoWork() override {
464
464
  auto &connection = Get<Connection>();
465
- auto &con = *connection.connection;
466
- // Now we create a table function relation
467
- auto table_function_relation = duckdb::make_shared_ptr<duckdb::TableFunctionRelation>(con.context,"scan_arrow_ipc",parameters);
468
- // Creates a relation for a temporary view that does replace
469
- auto view_relation = table_function_relation->CreateView(view_name,true,true);
470
-
471
- view_relation->Execute();
472
-
465
+ success = true;
466
+ try {
467
+ auto &con = *connection.connection;
468
+ // Now we create a table function relation
469
+ auto table_function_relation = duckdb::make_shared_ptr<duckdb::TableFunctionRelation>(con.context,"scan_arrow_ipc",parameters);
470
+ // Creates a relation for a temporary view that does replace
471
+ auto view_relation = table_function_relation->CreateView(view_name,true,true);
472
+ auto res = view_relation->Execute();
473
+ if (res->HasError()) {
474
+ success = false;
475
+ error = res->GetErrorObject();
476
+ }
477
+ } catch (duckdb::Exception &e) {
478
+ success = false;
479
+ error = duckdb::ErrorData(e);
480
+ return;
481
+ }
473
482
  }
474
483
 
484
+ void Callback() override {
485
+ auto env = object.Env();
486
+ Napi::HandleScope scope(env);
487
+ callback.Value().MakeCallback(object.Value(), {success ? env.Null() : Utils::CreateError(env, error)});
488
+ };
489
+
475
490
  duckdb::vector<duckdb::Value> parameters;
476
491
  std::string view_name;
492
+ bool success;
493
+ duckdb::ErrorData error;
477
494
  };
478
495
 
479
496
  // Register Arrow IPC buffers for scanning from DuckDB
@@ -512,20 +529,25 @@ Napi::Value Connection::RegisterBuffer(const Napi::CallbackInfo &info) {
512
529
  Napi::Uint8Array arr = v.As<Napi::Uint8Array>();
513
530
  auto raw_ptr = reinterpret_cast<uint64_t>(arr.ArrayBuffer().Data());
514
531
  auto length = (uint64_t)arr.ElementLength();
515
- duckdb::child_list_t<duckdb::Value> buffer_values;
516
- // This is a little bit evil, but allows us to support both libraries in between 1.2 and 1.3
517
- if (db.ExtensionIsLoaded("nanoarrow")){
518
- buffer_values.push_back({"ptr", duckdb::Value::POINTER(raw_ptr)});
519
- } else {
520
- buffer_values.push_back({"ptr", duckdb::Value::UBIGINT(raw_ptr)});
521
- }
532
+ duckdb::child_list_t<duckdb::Value> buffer_values;
533
+ // This is a little bit evil, but allows us to support both libraries in between 1.2 and 1.3
534
+ if (db.ExtensionIsLoaded("nanoarrow")){
535
+ buffer_values.push_back({"ptr", duckdb::Value::POINTER(raw_ptr)});
536
+ } else {
537
+ buffer_values.push_back({"ptr", duckdb::Value::UBIGINT(raw_ptr)});
538
+ }
522
539
  buffer_values.push_back({"size", duckdb::Value::UBIGINT(length)});
523
540
  values.push_back(duckdb::Value::STRUCT(buffer_values));
524
541
  }
525
542
  duckdb::vector<duckdb::Value> list_value;
526
- list_value.push_back(duckdb::Value::LIST(values));
543
+ list_value.push_back(duckdb::Value::LIST(values));
544
+
545
+ Napi::Function callback;
546
+ if (info.Length() > 3 && info[3].IsFunction()) {
547
+ callback = info[3].As<Napi::Function>();
548
+ }
527
549
 
528
- database_ref->Schedule(info.Env(), duckdb::make_uniq<CreateArrowViewTask>(*this, list_value, name));
550
+ database_ref->Schedule(info.Env(), duckdb::make_uniq<CreateArrowViewTask>(*this, list_value, name, callback));
529
551
 
530
552
  return Value();
531
553
  }
@@ -551,7 +573,7 @@ Napi::Value Connection::UnRegisterBuffer(const Napi::CallbackInfo &info) {
551
573
  };
552
574
 
553
575
  database_ref->Schedule(info.Env(),
554
- duckdb::make_uniq<ExecTaskWithCallback>(*this, final_query, callback, cpp_callback));
576
+ duckdb::make_uniq<ExecTaskWithCallback>(*this, final_query, callback, cpp_callback));
555
577
 
556
578
  return Value();
557
579
  }
@@ -44,14 +44,7 @@ struct StringAggFunction {
44
44
  if (!state.dataptr) {
45
45
  finalize_data.ReturnNull();
46
46
  } else {
47
- target = StringVector::AddString(finalize_data.result, state.dataptr, state.size);
48
- }
49
- }
50
-
51
- template <class STATE>
52
- static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
53
- if (state.dataptr) {
54
- delete[] state.dataptr;
47
+ target = string_t(state.dataptr, state.size);
55
48
  }
56
49
  }
57
50
 
@@ -59,12 +52,12 @@ struct StringAggFunction {
59
52
  return true;
60
53
  }
61
54
 
62
- static inline void PerformOperation(StringAggState &state, const char *str, const char *sep, idx_t str_size,
63
- idx_t sep_size) {
55
+ static inline void PerformOperation(StringAggState &state, ArenaAllocator &allocator, const char *str,
56
+ const char *sep, idx_t str_size, idx_t sep_size) {
64
57
  if (!state.dataptr) {
65
58
  // first iteration: allocate space for the string and copy it into the state
66
59
  state.alloc_size = MaxValue<idx_t>(8, NextPowerOfTwo(str_size));
67
- state.dataptr = new char[state.alloc_size];
60
+ state.dataptr = char_ptr_cast(allocator.Allocate(state.alloc_size));
68
61
  state.size = str_size;
69
62
  memcpy(state.dataptr, str, str_size);
70
63
  } else {
@@ -72,13 +65,12 @@ struct StringAggFunction {
72
65
  idx_t required_size = state.size + str_size + sep_size;
73
66
  if (required_size > state.alloc_size) {
74
67
  // no space! allocate extra space
68
+ const auto old_size = state.alloc_size;
75
69
  while (state.alloc_size < required_size) {
76
70
  state.alloc_size *= 2;
77
71
  }
78
- auto new_data = new char[state.alloc_size];
79
- memcpy(new_data, state.dataptr, state.size);
80
- delete[] state.dataptr;
81
- state.dataptr = new_data;
72
+ state.dataptr =
73
+ char_ptr_cast(allocator.Reallocate(data_ptr_cast(state.dataptr), old_size, state.alloc_size));
82
74
  }
83
75
  // copy the separator
84
76
  memcpy(state.dataptr + state.size, sep, sep_size);
@@ -89,14 +81,15 @@ struct StringAggFunction {
89
81
  }
90
82
  }
91
83
 
92
- static inline void PerformOperation(StringAggState &state, string_t str, optional_ptr<FunctionData> data_p) {
84
+ static inline void PerformOperation(StringAggState &state, ArenaAllocator &allocator, string_t str,
85
+ optional_ptr<FunctionData> data_p) {
93
86
  auto &data = data_p->Cast<StringAggBindData>();
94
- PerformOperation(state, str.GetData(), data.sep.c_str(), str.GetSize(), data.sep.size());
87
+ PerformOperation(state, allocator, str.GetData(), data.sep.c_str(), str.GetSize(), data.sep.size());
95
88
  }
96
89
 
97
90
  template <class INPUT_TYPE, class STATE, class OP>
98
91
  static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &unary_input) {
99
- PerformOperation(state, input, unary_input.input.bind_data);
92
+ PerformOperation(state, unary_input.input.allocator, input, unary_input.input.bind_data);
100
93
  }
101
94
 
102
95
  template <class INPUT_TYPE, class STATE, class OP>
@@ -113,8 +106,8 @@ struct StringAggFunction {
113
106
  // source is not set: skip combining
114
107
  return;
115
108
  }
116
- PerformOperation(target, string_t(source.dataptr, UnsafeNumericCast<uint32_t>(source.size)),
117
- aggr_input_data.bind_data);
109
+ PerformOperation(target, aggr_input_data.allocator,
110
+ string_t(source.dataptr, UnsafeNumericCast<uint32_t>(source.size)), aggr_input_data.bind_data);
118
111
  }
119
112
  };
120
113
 
@@ -162,8 +155,7 @@ AggregateFunctionSet StringAggFun::GetFunctions() {
162
155
  AggregateFunction::UnaryScatterUpdate<StringAggState, string_t, StringAggFunction>,
163
156
  AggregateFunction::StateCombine<StringAggState, StringAggFunction>,
164
157
  AggregateFunction::StateFinalize<StringAggState, string_t, StringAggFunction>,
165
- AggregateFunction::UnaryUpdate<StringAggState, string_t, StringAggFunction>, StringAggBind,
166
- AggregateFunction::StateDestroy<StringAggState, StringAggFunction>);
158
+ AggregateFunction::UnaryUpdate<StringAggState, string_t, StringAggFunction>, StringAggBind);
167
159
  string_agg_param.serialize = StringAggSerialize;
168
160
  string_agg_param.deserialize = StringAggDeserialize;
169
161
  string_agg.AddFunction(string_agg_param);
@@ -116,7 +116,6 @@ static void ListFinalize(Vector &states_vector, AggregateInputData &aggr_input_d
116
116
 
117
117
  // first iterate over all entries and set up the list entries, and get the newly required total length
118
118
  for (idx_t i = 0; i < count; i++) {
119
-
120
119
  auto &state = *states[states_data.sel->get_index(i)];
121
120
  const auto rid = i + offset;
122
121
  result_data[rid].offset = total_len;
@@ -223,17 +223,6 @@ void ExecuteExpression(const idx_t elem_cnt, const LambdaFunctions::ColumnInfo &
223
223
  // ListLambdaBindData
224
224
  //===--------------------------------------------------------------------===//
225
225
 
226
- unique_ptr<FunctionData> ListLambdaBindData::Copy() const {
227
- auto lambda_expr_copy = lambda_expr ? lambda_expr->Copy() : nullptr;
228
- return make_uniq<ListLambdaBindData>(return_type, std::move(lambda_expr_copy), has_index);
229
- }
230
-
231
- bool ListLambdaBindData::Equals(const FunctionData &other_p) const {
232
- auto &other = other_p.Cast<ListLambdaBindData>();
233
- return Expression::Equals(lambda_expr, other.lambda_expr) && return_type == other.return_type &&
234
- has_index == other.has_index;
235
- }
236
-
237
226
  void ListLambdaBindData::Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
238
227
  const ScalarFunction &) {
239
228
  auto &bind_data = bind_data_p->Cast<ListLambdaBindData>();
@@ -15,7 +15,17 @@
15
15
 
16
16
  namespace duckdb {
17
17
 
18
- // FIXME: use a local state for each thread to increase performance?
18
+ struct ListAggregatesLocalState : public FunctionLocalState {
19
+ explicit ListAggregatesLocalState(Allocator &allocator) : arena_allocator(allocator) {
20
+ }
21
+
22
+ ArenaAllocator arena_allocator;
23
+ };
24
+
25
+ unique_ptr<FunctionLocalState> ListAggregatesInitLocalState(ExpressionState &state, const BoundFunctionExpression &expr,
26
+ FunctionData *bind_data) {
27
+ return make_uniq<ListAggregatesLocalState>(BufferAllocator::Get(state.GetContext()));
28
+ }
19
29
  // FIXME: benchmark the use of simple_update against using update (if applicable)
20
30
 
21
31
  static unique_ptr<FunctionData> ListAggregatesBindFailure(ScalarFunction &bound_function) {
@@ -207,7 +217,8 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
207
217
  auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
208
218
  auto &info = func_expr.bind_info->Cast<ListAggregatesBindData>();
209
219
  auto &aggr = info.aggr_expr->Cast<BoundAggregateExpression>();
210
- ArenaAllocator allocator(Allocator::DefaultAllocator());
220
+ auto &allocator = ExecuteFunctionState::GetFunctionState(state)->Cast<ListAggregatesLocalState>().arena_allocator;
221
+ allocator.Reset();
211
222
  AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
212
223
 
213
224
  D_ASSERT(aggr.function.update);
@@ -511,8 +522,9 @@ static unique_ptr<FunctionData> ListUniqueBind(ClientContext &context, ScalarFun
511
522
  }
512
523
 
513
524
  ScalarFunction ListAggregateFun::GetFunction() {
514
- auto result = ScalarFunction({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR}, LogicalType::ANY,
515
- ListAggregateFunction, ListAggregateBind);
525
+ auto result =
526
+ ScalarFunction({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR}, LogicalType::ANY,
527
+ ListAggregateFunction, ListAggregateBind, nullptr, nullptr, ListAggregatesInitLocalState);
516
528
  BaseScalarFunction::SetReturnsError(result);
517
529
  result.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
518
530
  result.varargs = LogicalType::ANY;
@@ -523,12 +535,12 @@ ScalarFunction ListAggregateFun::GetFunction() {
523
535
 
524
536
  ScalarFunction ListDistinctFun::GetFunction() {
525
537
  return ScalarFunction({LogicalType::LIST(LogicalType::ANY)}, LogicalType::LIST(LogicalType::ANY),
526
- ListDistinctFunction, ListDistinctBind);
538
+ ListDistinctFunction, ListDistinctBind, nullptr, nullptr, ListAggregatesInitLocalState);
527
539
  }
528
540
 
529
541
  ScalarFunction ListUniqueFun::GetFunction() {
530
542
  return ScalarFunction({LogicalType::LIST(LogicalType::ANY)}, LogicalType::UBIGINT, ListUniqueFunction,
531
- ListUniqueBind);
543
+ ListUniqueBind, nullptr, nullptr, ListAggregatesInitLocalState);
532
544
  }
533
545
 
534
546
  } // namespace duckdb
@@ -71,13 +71,20 @@ unique_ptr<FunctionData> ICUDateFunc::Bind(ClientContext &context, ScalarFunctio
71
71
  return make_uniq<BindData>(context);
72
72
  }
73
73
 
74
- void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
74
+ bool ICUDateFunc::TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
75
75
  auto tz = icu_66::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(icu::StringPiece(tz_id.GetString())));
76
76
  if (*tz == icu::TimeZone::getUnknown()) {
77
77
  delete tz;
78
- throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString());
78
+ return false;
79
79
  }
80
80
  calendar->adoptTimeZone(tz);
81
+ return true;
82
+ }
83
+
84
+ void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
85
+ if (!TrySetTimeZone(calendar, tz_id)) {
86
+ throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString());
87
+ }
81
88
  }
82
89
 
83
90
  timestamp_t ICUDateFunc::GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros) {
@@ -11,9 +11,7 @@
11
11
  #include "duckdb/execution/expression_executor.hpp"
12
12
  #include "duckdb/function/scalar/strftime_format.hpp"
13
13
  #include "duckdb/main/client_context.hpp"
14
- #include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
15
14
  #include "duckdb/planner/expression/bound_function_expression.hpp"
16
- #include "duckdb/function/function_binder.hpp"
17
15
  #include "duckdb/function/cast/default_casts.hpp"
18
16
  #include "duckdb/main/extension_util.hpp"
19
17
 
@@ -60,14 +58,7 @@ struct ICUStrptime : public ICUDateFunc {
60
58
  }
61
59
 
62
60
  static uint64_t ToMicros(icu::Calendar *calendar, const ParseResult &parsed, const StrpTimeFormat &format) {
63
- // Set TZ first, if any.
64
- // Note that empty TZ names are not allowed,
65
- // but unknown names will map to GMT.
66
- if (!parsed.tz.empty()) {
67
- SetTimeZone(calendar, parsed.tz);
68
- }
69
-
70
- // Now get the parts in the given time zone
61
+ // Get the parts in the current time zone
71
62
  uint64_t micros = parsed.GetMicros();
72
63
  calendar->set(UCAL_EXTENDED_YEAR, parsed.data[0]); // strptime doesn't understand eras
73
64
  calendar->set(UCAL_MONTH, parsed.data[1] - 1);
@@ -110,6 +101,11 @@ struct ICUStrptime : public ICUDateFunc {
110
101
  if (parsed.is_special) {
111
102
  return parsed.ToTimestamp();
112
103
  } else {
104
+ // Set TZ first, if any.
105
+ if (!parsed.tz.empty()) {
106
+ SetTimeZone(calendar, parsed.tz);
107
+ }
108
+
113
109
  return GetTime(calendar, ToMicros(calendar, parsed, format));
114
110
  }
115
111
  }
@@ -143,7 +139,7 @@ struct ICUStrptime : public ICUDateFunc {
143
139
  if (format.Parse(input, parsed)) {
144
140
  if (parsed.is_special) {
145
141
  return parsed.ToTimestamp();
146
- } else {
142
+ } else if (parsed.tz.empty() || TrySetTimeZone(calendar, parsed.tz)) {
147
143
  timestamp_t result;
148
144
  if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
149
145
  return result;
@@ -49,7 +49,9 @@ struct ICUDateFunc {
49
49
  static duckdb::unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
50
50
  vector<duckdb::unique_ptr<Expression>> &arguments);
51
51
 
52
- //! Sets the time zone for the calendar.
52
+ //! Tries to set the time zone for the calendar and returns false if it is not valid.
53
+ static bool TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
54
+ //! Sets the time zone for the calendar. Throws if it is not valid
53
55
  static void SetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
54
56
  //! Gets the timestamp from the calendar, throwing if it is not in range.
55
57
  static bool TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result);
@@ -90,22 +90,16 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
90
90
  optional_ptr<FileHandle> override_handle) {
91
91
  if (size != 0) {
92
92
  auto &handle = override_handle ? *override_handle.get() : *file_handle.get();
93
- if (can_seek) {
94
- handle.Read(pointer, size, position);
95
- } else if (sample_run) { // Cache the buffer
96
- handle.Read(pointer, size, position);
97
93
 
94
+ if (!cached_buffers.empty() || position < cached_size) {
95
+ ReadFromCache(pointer, size, position);
96
+ }
97
+
98
+ handle.Read(pointer, size, position);
99
+ if (file_handle->IsPipe()) { // Cache the buffer
98
100
  cached_buffers.emplace_back(allocator.Allocate(size));
99
101
  memcpy(cached_buffers.back().get(), pointer, size);
100
102
  cached_size += size;
101
- } else {
102
- if (!cached_buffers.empty() || position < cached_size) {
103
- ReadFromCache(pointer, size, position);
104
- }
105
-
106
- if (size != 0) {
107
- handle.Read(pointer, size, position);
108
- }
109
103
  }
110
104
  }
111
105
 
@@ -121,30 +115,23 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
121
115
 
122
116
  bool JSONFileHandle::Read(char *pointer, idx_t &read_size, idx_t requested_size, bool &file_done, bool sample_run) {
123
117
  D_ASSERT(requested_size != 0);
118
+ read_size = 0;
124
119
  if (last_read_requested) {
125
120
  return false;
126
121
  }
127
122
 
128
- if (can_seek) {
129
- read_size = ReadInternal(pointer, requested_size);
130
- read_position += read_size;
131
- } else if (sample_run) { // Cache the buffer
132
- read_size = ReadInternal(pointer, requested_size);
133
- if (read_size > 0) {
134
- cached_buffers.emplace_back(allocator.Allocate(read_size));
135
- memcpy(cached_buffers.back().get(), pointer, read_size);
136
- }
137
- cached_size += read_size;
138
- read_position += read_size;
139
- } else {
140
- read_size = 0;
141
- if (!cached_buffers.empty() || read_position < cached_size) {
142
- read_size += ReadFromCache(pointer, requested_size, read_position);
143
- }
144
- if (requested_size != 0) {
145
- read_size += ReadInternal(pointer, requested_size);
146
- }
123
+ if (!cached_buffers.empty() || read_position < cached_size) {
124
+ read_size += ReadFromCache(pointer, requested_size, read_position);
125
+ }
126
+
127
+ auto temp_read_size = ReadInternal(pointer, requested_size);
128
+ if (file_handle->IsPipe() && temp_read_size != 0) { // Cache the buffer
129
+ cached_buffers.emplace_back(allocator.Allocate(temp_read_size));
130
+ memcpy(cached_buffers.back().get(), pointer, temp_read_size);
147
131
  }
132
+ cached_size += temp_read_size;
133
+ read_position += temp_read_size;
134
+ read_size += temp_read_size;
148
135
 
149
136
  if (read_size == 0) {
150
137
  last_read_requested = true;
@@ -17,12 +17,17 @@
17
17
  namespace duckdb {
18
18
 
19
19
  static DefaultMacro json_macros[] = {
20
- {DEFAULT_SCHEMA, "json_group_array", {"x", nullptr}, {{nullptr, nullptr}}, "to_json(list(x))"},
20
+ {DEFAULT_SCHEMA,
21
+ "json_group_array",
22
+ {"x", nullptr},
23
+ {{nullptr, nullptr}},
24
+ "CAST('[' || string_agg(CASE WHEN x IS NULL THEN 'null'::JSON ELSE to_json(x) END, ',') || ']' AS JSON)"},
21
25
  {DEFAULT_SCHEMA,
22
26
  "json_group_object",
23
- {"name", "value", nullptr},
27
+ {"n", "v", nullptr},
24
28
  {{nullptr, nullptr}},
25
- "to_json(map(list(name), list(value)))"},
29
+ "CAST('{' || string_agg(to_json(n::VARCHAR) || ':' || CASE WHEN v IS NULL THEN 'null'::JSON ELSE to_json(v) END, "
30
+ "',') || '}' AS JSON)"},
26
31
  {DEFAULT_SCHEMA,
27
32
  "json_group_structure",
28
33
  {"x", nullptr},
@@ -319,7 +319,8 @@ void ColumnReader::PreparePageV2(PageHeader &page_hdr) {
319
319
 
320
320
  auto compressed_bytes = page_hdr.compressed_page_size - uncompressed_bytes;
321
321
 
322
- AllocateCompressed(compressed_bytes);
322
+ ResizeableBuffer compressed_buffer;
323
+ compressed_buffer.resize(GetAllocator(), compressed_bytes);
323
324
  reader.ReadData(*protocol, compressed_buffer.ptr, compressed_bytes);
324
325
 
325
326
  DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, compressed_bytes, block->ptr + uncompressed_bytes,
@@ -334,10 +335,6 @@ void ColumnReader::AllocateBlock(idx_t size) {
334
335
  }
335
336
  }
336
337
 
337
- void ColumnReader::AllocateCompressed(idx_t size) {
338
- compressed_buffer.resize(GetAllocator(), size);
339
- }
340
-
341
338
  void ColumnReader::PreparePage(PageHeader &page_hdr) {
342
339
  AllocateBlock(page_hdr.uncompressed_page_size + 1);
343
340
  if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) {
@@ -348,7 +345,8 @@ void ColumnReader::PreparePage(PageHeader &page_hdr) {
348
345
  return;
349
346
  }
350
347
 
351
- AllocateCompressed(page_hdr.compressed_page_size + 1);
348
+ ResizeableBuffer compressed_buffer;
349
+ compressed_buffer.resize(GetAllocator(), page_hdr.compressed_page_size + 1);
352
350
  reader.ReadData(*protocol, compressed_buffer.ptr, page_hdr.compressed_page_size);
353
351
 
354
352
  DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, page_hdr.compressed_page_size, block->ptr,