duckdb 1.2.1-dev4.0 → 1.2.1-dev8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/connection.cpp +57 -35
- package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
- package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
- package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
- package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
- package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
- package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
- package/src/duckdb/extension/json/json_extension.cpp +8 -3
- package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
- package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
- package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
- package/src/duckdb/src/catalog/catalog.cpp +12 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
- package/src/duckdb/src/common/bind_helpers.cpp +3 -0
- package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +19 -6
- package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
- package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
- package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
- package/src/duckdb/src/include/duckdb.h +495 -480
- package/src/duckdb/src/main/attached_database.cpp +1 -1
- package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
- package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
- package/src/duckdb/src/main/config.cpp +7 -1
- package/src/duckdb/src/main/database.cpp +8 -8
- package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
- package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
- package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
- package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
- package/src/duckdb/src/parallel/executor_task.cpp +10 -6
- package/src/duckdb/src/parallel/task_executor.cpp +4 -1
- package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/storage_manager.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group.cpp +5 -6
- package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
- package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
- package/src/duckdb/ub_src_parallel.cpp +2 -0
package/package.json
CHANGED
package/src/connection.cpp
CHANGED
@@ -17,12 +17,12 @@ Napi::FunctionReference Connection::Init(Napi::Env env, Napi::Object exports) {
|
|
17
17
|
Napi::HandleScope scope(env);
|
18
18
|
|
19
19
|
Napi::Function t = DefineClass(
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
env, "Connection",
|
21
|
+
{InstanceMethod("prepare", &Connection::Prepare), InstanceMethod("exec", &Connection::Exec),
|
22
|
+
InstanceMethod("register_udf_bulk", &Connection::RegisterUdf),
|
23
|
+
InstanceMethod("register_buffer", &Connection::RegisterBuffer),
|
24
|
+
InstanceMethod("unregister_udf", &Connection::UnregisterUdf), InstanceMethod("close", &Connection::Close),
|
25
|
+
InstanceMethod("unregister_buffer", &Connection::UnRegisterBuffer)});
|
26
26
|
|
27
27
|
exports.Set("Connection", t);
|
28
28
|
|
@@ -234,14 +234,14 @@ void DuckDBNodeUDFLauncher(Napi::Env env, Napi::Function jsudf, std::nullptr_t *
|
|
234
234
|
|
235
235
|
struct RegisterUdfTask : public Task {
|
236
236
|
RegisterUdfTask(Connection &connection, std::string name, std::string return_type_name, Napi::Function callback)
|
237
|
-
|
237
|
+
: Task(connection, callback), name(std::move(name)), return_type_name(std::move(return_type_name)) {
|
238
238
|
}
|
239
239
|
|
240
240
|
void DoWork() override {
|
241
241
|
auto &connection = Get<Connection>();
|
242
242
|
auto &udf_ptr = connection.udfs[name];
|
243
243
|
duckdb::scalar_function_t udf_function = [&udf_ptr](duckdb::DataChunk &args, duckdb::ExpressionState &state,
|
244
|
-
|
244
|
+
duckdb::Vector &result) -> void {
|
245
245
|
// here we can do only DuckDB stuff because we do not have a functioning env
|
246
246
|
|
247
247
|
// Flatten all args to simplify udfs
|
@@ -271,7 +271,7 @@ struct RegisterUdfTask : public Task {
|
|
271
271
|
auto return_type = cast.cast_type;
|
272
272
|
|
273
273
|
connection.connection->CreateVectorizedFunction(name, vector<duckdb::LogicalType> {}, return_type, udf_function,
|
274
|
-
|
274
|
+
duckdb::LogicalType::ANY);
|
275
275
|
}
|
276
276
|
std::string name;
|
277
277
|
std::string return_type_name;
|
@@ -296,7 +296,7 @@ Napi::Value Connection::RegisterUdf(const Napi::CallbackInfo &info) {
|
|
296
296
|
}
|
297
297
|
|
298
298
|
auto udf = duckdb_node_udf_function_t::New(env, udf_callback, "duckdb_node_udf" + name, 0, 1, nullptr,
|
299
|
-
|
299
|
+
[](Napi::Env, void *, std::nullptr_t *ctx) {});
|
300
300
|
|
301
301
|
// we have to unref the udf because otherwise there is a circular ref with the connection somehow(?)
|
302
302
|
// this took far too long to figure out
|
@@ -304,14 +304,14 @@ Napi::Value Connection::RegisterUdf(const Napi::CallbackInfo &info) {
|
|
304
304
|
udfs[name] = udf;
|
305
305
|
|
306
306
|
database_ref->Schedule(info.Env(),
|
307
|
-
|
307
|
+
duckdb::make_uniq<RegisterUdfTask>(*this, name, return_type_name, completion_callback));
|
308
308
|
|
309
309
|
return Value();
|
310
310
|
}
|
311
311
|
|
312
312
|
struct UnregisterUdfTask : public Task {
|
313
313
|
UnregisterUdfTask(Connection &connection, std::string name, Napi::Function callback)
|
314
|
-
|
314
|
+
: Task(connection, callback), name(std::move(name)) {
|
315
315
|
}
|
316
316
|
|
317
317
|
void DoWork() override {
|
@@ -354,7 +354,7 @@ Napi::Value Connection::UnregisterUdf(const Napi::CallbackInfo &info) {
|
|
354
354
|
|
355
355
|
struct ExecTask : public Task {
|
356
356
|
ExecTask(Connection &connection, std::string sql, Napi::Function callback)
|
357
|
-
|
357
|
+
: Task(connection, callback), sql(std::move(sql)) {
|
358
358
|
}
|
359
359
|
|
360
360
|
void DoWork() override {
|
@@ -395,8 +395,8 @@ struct ExecTask : public Task {
|
|
395
395
|
|
396
396
|
struct ExecTaskWithCallback : public ExecTask {
|
397
397
|
ExecTaskWithCallback(Connection &connection, std::string sql, Napi::Function js_callback,
|
398
|
-
|
399
|
-
|
398
|
+
std::function<void(void)> cpp_callback)
|
399
|
+
: ExecTask(connection, sql, js_callback), cpp_callback(cpp_callback) {
|
400
400
|
}
|
401
401
|
|
402
402
|
void Callback() override {
|
@@ -456,24 +456,41 @@ Napi::Value Connection::Exec(const Napi::CallbackInfo &info) {
|
|
456
456
|
}
|
457
457
|
|
458
458
|
struct CreateArrowViewTask : public Task {
|
459
|
-
CreateArrowViewTask(Connection &connection, duckdb::vector<duckdb::Value>& parameters, std::string &view_name)
|
460
|
-
|
459
|
+
CreateArrowViewTask(Connection &connection, duckdb::vector<duckdb::Value>& parameters, std::string &view_name, Napi::Function callback)
|
460
|
+
: Task(connection, callback), parameters(parameters), view_name(view_name) {
|
461
461
|
}
|
462
462
|
|
463
463
|
void DoWork() override {
|
464
464
|
auto &connection = Get<Connection>();
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
465
|
+
success = true;
|
466
|
+
try {
|
467
|
+
auto &con = *connection.connection;
|
468
|
+
// Now we create a table function relation
|
469
|
+
auto table_function_relation = duckdb::make_shared_ptr<duckdb::TableFunctionRelation>(con.context,"scan_arrow_ipc",parameters);
|
470
|
+
// Creates a relation for a temporary view that does replace
|
471
|
+
auto view_relation = table_function_relation->CreateView(view_name,true,true);
|
472
|
+
auto res = view_relation->Execute();
|
473
|
+
if (res->HasError()) {
|
474
|
+
success = false;
|
475
|
+
error = res->GetErrorObject();
|
476
|
+
}
|
477
|
+
} catch (duckdb::Exception &e) {
|
478
|
+
success = false;
|
479
|
+
error = duckdb::ErrorData(e);
|
480
|
+
return;
|
481
|
+
}
|
473
482
|
}
|
474
483
|
|
484
|
+
void Callback() override {
|
485
|
+
auto env = object.Env();
|
486
|
+
Napi::HandleScope scope(env);
|
487
|
+
callback.Value().MakeCallback(object.Value(), {success ? env.Null() : Utils::CreateError(env, error)});
|
488
|
+
};
|
489
|
+
|
475
490
|
duckdb::vector<duckdb::Value> parameters;
|
476
491
|
std::string view_name;
|
492
|
+
bool success;
|
493
|
+
duckdb::ErrorData error;
|
477
494
|
};
|
478
495
|
|
479
496
|
// Register Arrow IPC buffers for scanning from DuckDB
|
@@ -512,20 +529,25 @@ Napi::Value Connection::RegisterBuffer(const Napi::CallbackInfo &info) {
|
|
512
529
|
Napi::Uint8Array arr = v.As<Napi::Uint8Array>();
|
513
530
|
auto raw_ptr = reinterpret_cast<uint64_t>(arr.ArrayBuffer().Data());
|
514
531
|
auto length = (uint64_t)arr.ElementLength();
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
532
|
+
duckdb::child_list_t<duckdb::Value> buffer_values;
|
533
|
+
// This is a little bit evil, but allows us to support both libraries in between 1.2 and 1.3
|
534
|
+
if (db.ExtensionIsLoaded("nanoarrow")){
|
535
|
+
buffer_values.push_back({"ptr", duckdb::Value::POINTER(raw_ptr)});
|
536
|
+
} else {
|
537
|
+
buffer_values.push_back({"ptr", duckdb::Value::UBIGINT(raw_ptr)});
|
538
|
+
}
|
522
539
|
buffer_values.push_back({"size", duckdb::Value::UBIGINT(length)});
|
523
540
|
values.push_back(duckdb::Value::STRUCT(buffer_values));
|
524
541
|
}
|
525
542
|
duckdb::vector<duckdb::Value> list_value;
|
526
|
-
|
543
|
+
list_value.push_back(duckdb::Value::LIST(values));
|
544
|
+
|
545
|
+
Napi::Function callback;
|
546
|
+
if (info.Length() > 3 && info[3].IsFunction()) {
|
547
|
+
callback = info[3].As<Napi::Function>();
|
548
|
+
}
|
527
549
|
|
528
|
-
database_ref->Schedule(info.Env(), duckdb::make_uniq<CreateArrowViewTask>(*this, list_value, name));
|
550
|
+
database_ref->Schedule(info.Env(), duckdb::make_uniq<CreateArrowViewTask>(*this, list_value, name, callback));
|
529
551
|
|
530
552
|
return Value();
|
531
553
|
}
|
@@ -551,7 +573,7 @@ Napi::Value Connection::UnRegisterBuffer(const Napi::CallbackInfo &info) {
|
|
551
573
|
};
|
552
574
|
|
553
575
|
database_ref->Schedule(info.Env(),
|
554
|
-
|
576
|
+
duckdb::make_uniq<ExecTaskWithCallback>(*this, final_query, callback, cpp_callback));
|
555
577
|
|
556
578
|
return Value();
|
557
579
|
}
|
@@ -44,14 +44,7 @@ struct StringAggFunction {
|
|
44
44
|
if (!state.dataptr) {
|
45
45
|
finalize_data.ReturnNull();
|
46
46
|
} else {
|
47
|
-
target =
|
48
|
-
}
|
49
|
-
}
|
50
|
-
|
51
|
-
template <class STATE>
|
52
|
-
static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
|
53
|
-
if (state.dataptr) {
|
54
|
-
delete[] state.dataptr;
|
47
|
+
target = string_t(state.dataptr, state.size);
|
55
48
|
}
|
56
49
|
}
|
57
50
|
|
@@ -59,12 +52,12 @@ struct StringAggFunction {
|
|
59
52
|
return true;
|
60
53
|
}
|
61
54
|
|
62
|
-
static inline void PerformOperation(StringAggState &state,
|
63
|
-
idx_t sep_size) {
|
55
|
+
static inline void PerformOperation(StringAggState &state, ArenaAllocator &allocator, const char *str,
|
56
|
+
const char *sep, idx_t str_size, idx_t sep_size) {
|
64
57
|
if (!state.dataptr) {
|
65
58
|
// first iteration: allocate space for the string and copy it into the state
|
66
59
|
state.alloc_size = MaxValue<idx_t>(8, NextPowerOfTwo(str_size));
|
67
|
-
state.dataptr =
|
60
|
+
state.dataptr = char_ptr_cast(allocator.Allocate(state.alloc_size));
|
68
61
|
state.size = str_size;
|
69
62
|
memcpy(state.dataptr, str, str_size);
|
70
63
|
} else {
|
@@ -72,13 +65,12 @@ struct StringAggFunction {
|
|
72
65
|
idx_t required_size = state.size + str_size + sep_size;
|
73
66
|
if (required_size > state.alloc_size) {
|
74
67
|
// no space! allocate extra space
|
68
|
+
const auto old_size = state.alloc_size;
|
75
69
|
while (state.alloc_size < required_size) {
|
76
70
|
state.alloc_size *= 2;
|
77
71
|
}
|
78
|
-
|
79
|
-
|
80
|
-
delete[] state.dataptr;
|
81
|
-
state.dataptr = new_data;
|
72
|
+
state.dataptr =
|
73
|
+
char_ptr_cast(allocator.Reallocate(data_ptr_cast(state.dataptr), old_size, state.alloc_size));
|
82
74
|
}
|
83
75
|
// copy the separator
|
84
76
|
memcpy(state.dataptr + state.size, sep, sep_size);
|
@@ -89,14 +81,15 @@ struct StringAggFunction {
|
|
89
81
|
}
|
90
82
|
}
|
91
83
|
|
92
|
-
static inline void PerformOperation(StringAggState &state, string_t str,
|
84
|
+
static inline void PerformOperation(StringAggState &state, ArenaAllocator &allocator, string_t str,
|
85
|
+
optional_ptr<FunctionData> data_p) {
|
93
86
|
auto &data = data_p->Cast<StringAggBindData>();
|
94
|
-
PerformOperation(state, str.GetData(), data.sep.c_str(), str.GetSize(), data.sep.size());
|
87
|
+
PerformOperation(state, allocator, str.GetData(), data.sep.c_str(), str.GetSize(), data.sep.size());
|
95
88
|
}
|
96
89
|
|
97
90
|
template <class INPUT_TYPE, class STATE, class OP>
|
98
91
|
static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &unary_input) {
|
99
|
-
PerformOperation(state, input, unary_input.input.bind_data);
|
92
|
+
PerformOperation(state, unary_input.input.allocator, input, unary_input.input.bind_data);
|
100
93
|
}
|
101
94
|
|
102
95
|
template <class INPUT_TYPE, class STATE, class OP>
|
@@ -113,8 +106,8 @@ struct StringAggFunction {
|
|
113
106
|
// source is not set: skip combining
|
114
107
|
return;
|
115
108
|
}
|
116
|
-
PerformOperation(target,
|
117
|
-
aggr_input_data.bind_data);
|
109
|
+
PerformOperation(target, aggr_input_data.allocator,
|
110
|
+
string_t(source.dataptr, UnsafeNumericCast<uint32_t>(source.size)), aggr_input_data.bind_data);
|
118
111
|
}
|
119
112
|
};
|
120
113
|
|
@@ -162,8 +155,7 @@ AggregateFunctionSet StringAggFun::GetFunctions() {
|
|
162
155
|
AggregateFunction::UnaryScatterUpdate<StringAggState, string_t, StringAggFunction>,
|
163
156
|
AggregateFunction::StateCombine<StringAggState, StringAggFunction>,
|
164
157
|
AggregateFunction::StateFinalize<StringAggState, string_t, StringAggFunction>,
|
165
|
-
AggregateFunction::UnaryUpdate<StringAggState, string_t, StringAggFunction>, StringAggBind
|
166
|
-
AggregateFunction::StateDestroy<StringAggState, StringAggFunction>);
|
158
|
+
AggregateFunction::UnaryUpdate<StringAggState, string_t, StringAggFunction>, StringAggBind);
|
167
159
|
string_agg_param.serialize = StringAggSerialize;
|
168
160
|
string_agg_param.deserialize = StringAggDeserialize;
|
169
161
|
string_agg.AddFunction(string_agg_param);
|
@@ -116,7 +116,6 @@ static void ListFinalize(Vector &states_vector, AggregateInputData &aggr_input_d
|
|
116
116
|
|
117
117
|
// first iterate over all entries and set up the list entries, and get the newly required total length
|
118
118
|
for (idx_t i = 0; i < count; i++) {
|
119
|
-
|
120
119
|
auto &state = *states[states_data.sel->get_index(i)];
|
121
120
|
const auto rid = i + offset;
|
122
121
|
result_data[rid].offset = total_len;
|
@@ -223,17 +223,6 @@ void ExecuteExpression(const idx_t elem_cnt, const LambdaFunctions::ColumnInfo &
|
|
223
223
|
// ListLambdaBindData
|
224
224
|
//===--------------------------------------------------------------------===//
|
225
225
|
|
226
|
-
unique_ptr<FunctionData> ListLambdaBindData::Copy() const {
|
227
|
-
auto lambda_expr_copy = lambda_expr ? lambda_expr->Copy() : nullptr;
|
228
|
-
return make_uniq<ListLambdaBindData>(return_type, std::move(lambda_expr_copy), has_index);
|
229
|
-
}
|
230
|
-
|
231
|
-
bool ListLambdaBindData::Equals(const FunctionData &other_p) const {
|
232
|
-
auto &other = other_p.Cast<ListLambdaBindData>();
|
233
|
-
return Expression::Equals(lambda_expr, other.lambda_expr) && return_type == other.return_type &&
|
234
|
-
has_index == other.has_index;
|
235
|
-
}
|
236
|
-
|
237
226
|
void ListLambdaBindData::Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
|
238
227
|
const ScalarFunction &) {
|
239
228
|
auto &bind_data = bind_data_p->Cast<ListLambdaBindData>();
|
@@ -15,7 +15,17 @@
|
|
15
15
|
|
16
16
|
namespace duckdb {
|
17
17
|
|
18
|
-
|
18
|
+
struct ListAggregatesLocalState : public FunctionLocalState {
|
19
|
+
explicit ListAggregatesLocalState(Allocator &allocator) : arena_allocator(allocator) {
|
20
|
+
}
|
21
|
+
|
22
|
+
ArenaAllocator arena_allocator;
|
23
|
+
};
|
24
|
+
|
25
|
+
unique_ptr<FunctionLocalState> ListAggregatesInitLocalState(ExpressionState &state, const BoundFunctionExpression &expr,
|
26
|
+
FunctionData *bind_data) {
|
27
|
+
return make_uniq<ListAggregatesLocalState>(BufferAllocator::Get(state.GetContext()));
|
28
|
+
}
|
19
29
|
// FIXME: benchmark the use of simple_update against using update (if applicable)
|
20
30
|
|
21
31
|
static unique_ptr<FunctionData> ListAggregatesBindFailure(ScalarFunction &bound_function) {
|
@@ -207,7 +217,8 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
|
|
207
217
|
auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
|
208
218
|
auto &info = func_expr.bind_info->Cast<ListAggregatesBindData>();
|
209
219
|
auto &aggr = info.aggr_expr->Cast<BoundAggregateExpression>();
|
210
|
-
|
220
|
+
auto &allocator = ExecuteFunctionState::GetFunctionState(state)->Cast<ListAggregatesLocalState>().arena_allocator;
|
221
|
+
allocator.Reset();
|
211
222
|
AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
|
212
223
|
|
213
224
|
D_ASSERT(aggr.function.update);
|
@@ -511,8 +522,9 @@ static unique_ptr<FunctionData> ListUniqueBind(ClientContext &context, ScalarFun
|
|
511
522
|
}
|
512
523
|
|
513
524
|
ScalarFunction ListAggregateFun::GetFunction() {
|
514
|
-
auto result =
|
515
|
-
|
525
|
+
auto result =
|
526
|
+
ScalarFunction({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR}, LogicalType::ANY,
|
527
|
+
ListAggregateFunction, ListAggregateBind, nullptr, nullptr, ListAggregatesInitLocalState);
|
516
528
|
BaseScalarFunction::SetReturnsError(result);
|
517
529
|
result.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
|
518
530
|
result.varargs = LogicalType::ANY;
|
@@ -523,12 +535,12 @@ ScalarFunction ListAggregateFun::GetFunction() {
|
|
523
535
|
|
524
536
|
ScalarFunction ListDistinctFun::GetFunction() {
|
525
537
|
return ScalarFunction({LogicalType::LIST(LogicalType::ANY)}, LogicalType::LIST(LogicalType::ANY),
|
526
|
-
ListDistinctFunction, ListDistinctBind);
|
538
|
+
ListDistinctFunction, ListDistinctBind, nullptr, nullptr, ListAggregatesInitLocalState);
|
527
539
|
}
|
528
540
|
|
529
541
|
ScalarFunction ListUniqueFun::GetFunction() {
|
530
542
|
return ScalarFunction({LogicalType::LIST(LogicalType::ANY)}, LogicalType::UBIGINT, ListUniqueFunction,
|
531
|
-
ListUniqueBind);
|
543
|
+
ListUniqueBind, nullptr, nullptr, ListAggregatesInitLocalState);
|
532
544
|
}
|
533
545
|
|
534
546
|
} // namespace duckdb
|
@@ -71,13 +71,20 @@ unique_ptr<FunctionData> ICUDateFunc::Bind(ClientContext &context, ScalarFunctio
|
|
71
71
|
return make_uniq<BindData>(context);
|
72
72
|
}
|
73
73
|
|
74
|
-
|
74
|
+
bool ICUDateFunc::TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
|
75
75
|
auto tz = icu_66::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(icu::StringPiece(tz_id.GetString())));
|
76
76
|
if (*tz == icu::TimeZone::getUnknown()) {
|
77
77
|
delete tz;
|
78
|
-
|
78
|
+
return false;
|
79
79
|
}
|
80
80
|
calendar->adoptTimeZone(tz);
|
81
|
+
return true;
|
82
|
+
}
|
83
|
+
|
84
|
+
void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
|
85
|
+
if (!TrySetTimeZone(calendar, tz_id)) {
|
86
|
+
throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString());
|
87
|
+
}
|
81
88
|
}
|
82
89
|
|
83
90
|
timestamp_t ICUDateFunc::GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros) {
|
@@ -11,9 +11,7 @@
|
|
11
11
|
#include "duckdb/execution/expression_executor.hpp"
|
12
12
|
#include "duckdb/function/scalar/strftime_format.hpp"
|
13
13
|
#include "duckdb/main/client_context.hpp"
|
14
|
-
#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
|
15
14
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
16
|
-
#include "duckdb/function/function_binder.hpp"
|
17
15
|
#include "duckdb/function/cast/default_casts.hpp"
|
18
16
|
#include "duckdb/main/extension_util.hpp"
|
19
17
|
|
@@ -60,14 +58,7 @@ struct ICUStrptime : public ICUDateFunc {
|
|
60
58
|
}
|
61
59
|
|
62
60
|
static uint64_t ToMicros(icu::Calendar *calendar, const ParseResult &parsed, const StrpTimeFormat &format) {
|
63
|
-
//
|
64
|
-
// Note that empty TZ names are not allowed,
|
65
|
-
// but unknown names will map to GMT.
|
66
|
-
if (!parsed.tz.empty()) {
|
67
|
-
SetTimeZone(calendar, parsed.tz);
|
68
|
-
}
|
69
|
-
|
70
|
-
// Now get the parts in the given time zone
|
61
|
+
// Get the parts in the current time zone
|
71
62
|
uint64_t micros = parsed.GetMicros();
|
72
63
|
calendar->set(UCAL_EXTENDED_YEAR, parsed.data[0]); // strptime doesn't understand eras
|
73
64
|
calendar->set(UCAL_MONTH, parsed.data[1] - 1);
|
@@ -110,6 +101,11 @@ struct ICUStrptime : public ICUDateFunc {
|
|
110
101
|
if (parsed.is_special) {
|
111
102
|
return parsed.ToTimestamp();
|
112
103
|
} else {
|
104
|
+
// Set TZ first, if any.
|
105
|
+
if (!parsed.tz.empty()) {
|
106
|
+
SetTimeZone(calendar, parsed.tz);
|
107
|
+
}
|
108
|
+
|
113
109
|
return GetTime(calendar, ToMicros(calendar, parsed, format));
|
114
110
|
}
|
115
111
|
}
|
@@ -143,7 +139,7 @@ struct ICUStrptime : public ICUDateFunc {
|
|
143
139
|
if (format.Parse(input, parsed)) {
|
144
140
|
if (parsed.is_special) {
|
145
141
|
return parsed.ToTimestamp();
|
146
|
-
} else {
|
142
|
+
} else if (parsed.tz.empty() || TrySetTimeZone(calendar, parsed.tz)) {
|
147
143
|
timestamp_t result;
|
148
144
|
if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
|
149
145
|
return result;
|
@@ -49,7 +49,9 @@ struct ICUDateFunc {
|
|
49
49
|
static duckdb::unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
|
50
50
|
vector<duckdb::unique_ptr<Expression>> &arguments);
|
51
51
|
|
52
|
-
//!
|
52
|
+
//! Tries to set the time zone for the calendar and returns false if it is not valid.
|
53
|
+
static bool TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
|
54
|
+
//! Sets the time zone for the calendar. Throws if it is not valid
|
53
55
|
static void SetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
|
54
56
|
//! Gets the timestamp from the calendar, throwing if it is not in range.
|
55
57
|
static bool TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result);
|
@@ -90,22 +90,16 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
|
|
90
90
|
optional_ptr<FileHandle> override_handle) {
|
91
91
|
if (size != 0) {
|
92
92
|
auto &handle = override_handle ? *override_handle.get() : *file_handle.get();
|
93
|
-
if (can_seek) {
|
94
|
-
handle.Read(pointer, size, position);
|
95
|
-
} else if (sample_run) { // Cache the buffer
|
96
|
-
handle.Read(pointer, size, position);
|
97
93
|
|
94
|
+
if (!cached_buffers.empty() || position < cached_size) {
|
95
|
+
ReadFromCache(pointer, size, position);
|
96
|
+
}
|
97
|
+
|
98
|
+
handle.Read(pointer, size, position);
|
99
|
+
if (file_handle->IsPipe()) { // Cache the buffer
|
98
100
|
cached_buffers.emplace_back(allocator.Allocate(size));
|
99
101
|
memcpy(cached_buffers.back().get(), pointer, size);
|
100
102
|
cached_size += size;
|
101
|
-
} else {
|
102
|
-
if (!cached_buffers.empty() || position < cached_size) {
|
103
|
-
ReadFromCache(pointer, size, position);
|
104
|
-
}
|
105
|
-
|
106
|
-
if (size != 0) {
|
107
|
-
handle.Read(pointer, size, position);
|
108
|
-
}
|
109
103
|
}
|
110
104
|
}
|
111
105
|
|
@@ -121,30 +115,23 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
|
|
121
115
|
|
122
116
|
bool JSONFileHandle::Read(char *pointer, idx_t &read_size, idx_t requested_size, bool &file_done, bool sample_run) {
|
123
117
|
D_ASSERT(requested_size != 0);
|
118
|
+
read_size = 0;
|
124
119
|
if (last_read_requested) {
|
125
120
|
return false;
|
126
121
|
}
|
127
122
|
|
128
|
-
if (
|
129
|
-
read_size
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
}
|
137
|
-
cached_size += read_size;
|
138
|
-
read_position += read_size;
|
139
|
-
} else {
|
140
|
-
read_size = 0;
|
141
|
-
if (!cached_buffers.empty() || read_position < cached_size) {
|
142
|
-
read_size += ReadFromCache(pointer, requested_size, read_position);
|
143
|
-
}
|
144
|
-
if (requested_size != 0) {
|
145
|
-
read_size += ReadInternal(pointer, requested_size);
|
146
|
-
}
|
123
|
+
if (!cached_buffers.empty() || read_position < cached_size) {
|
124
|
+
read_size += ReadFromCache(pointer, requested_size, read_position);
|
125
|
+
}
|
126
|
+
|
127
|
+
auto temp_read_size = ReadInternal(pointer, requested_size);
|
128
|
+
if (file_handle->IsPipe() && temp_read_size != 0) { // Cache the buffer
|
129
|
+
cached_buffers.emplace_back(allocator.Allocate(temp_read_size));
|
130
|
+
memcpy(cached_buffers.back().get(), pointer, temp_read_size);
|
147
131
|
}
|
132
|
+
cached_size += temp_read_size;
|
133
|
+
read_position += temp_read_size;
|
134
|
+
read_size += temp_read_size;
|
148
135
|
|
149
136
|
if (read_size == 0) {
|
150
137
|
last_read_requested = true;
|
@@ -17,12 +17,17 @@
|
|
17
17
|
namespace duckdb {
|
18
18
|
|
19
19
|
static DefaultMacro json_macros[] = {
|
20
|
-
{DEFAULT_SCHEMA,
|
20
|
+
{DEFAULT_SCHEMA,
|
21
|
+
"json_group_array",
|
22
|
+
{"x", nullptr},
|
23
|
+
{{nullptr, nullptr}},
|
24
|
+
"CAST('[' || string_agg(CASE WHEN x IS NULL THEN 'null'::JSON ELSE to_json(x) END, ',') || ']' AS JSON)"},
|
21
25
|
{DEFAULT_SCHEMA,
|
22
26
|
"json_group_object",
|
23
|
-
{"
|
27
|
+
{"n", "v", nullptr},
|
24
28
|
{{nullptr, nullptr}},
|
25
|
-
"
|
29
|
+
"CAST('{' || string_agg(to_json(n::VARCHAR) || ':' || CASE WHEN v IS NULL THEN 'null'::JSON ELSE to_json(v) END, "
|
30
|
+
"',') || '}' AS JSON)"},
|
26
31
|
{DEFAULT_SCHEMA,
|
27
32
|
"json_group_structure",
|
28
33
|
{"x", nullptr},
|
@@ -319,7 +319,8 @@ void ColumnReader::PreparePageV2(PageHeader &page_hdr) {
|
|
319
319
|
|
320
320
|
auto compressed_bytes = page_hdr.compressed_page_size - uncompressed_bytes;
|
321
321
|
|
322
|
-
|
322
|
+
ResizeableBuffer compressed_buffer;
|
323
|
+
compressed_buffer.resize(GetAllocator(), compressed_bytes);
|
323
324
|
reader.ReadData(*protocol, compressed_buffer.ptr, compressed_bytes);
|
324
325
|
|
325
326
|
DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, compressed_bytes, block->ptr + uncompressed_bytes,
|
@@ -334,10 +335,6 @@ void ColumnReader::AllocateBlock(idx_t size) {
|
|
334
335
|
}
|
335
336
|
}
|
336
337
|
|
337
|
-
void ColumnReader::AllocateCompressed(idx_t size) {
|
338
|
-
compressed_buffer.resize(GetAllocator(), size);
|
339
|
-
}
|
340
|
-
|
341
338
|
void ColumnReader::PreparePage(PageHeader &page_hdr) {
|
342
339
|
AllocateBlock(page_hdr.uncompressed_page_size + 1);
|
343
340
|
if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) {
|
@@ -348,7 +345,8 @@ void ColumnReader::PreparePage(PageHeader &page_hdr) {
|
|
348
345
|
return;
|
349
346
|
}
|
350
347
|
|
351
|
-
|
348
|
+
ResizeableBuffer compressed_buffer;
|
349
|
+
compressed_buffer.resize(GetAllocator(), page_hdr.compressed_page_size + 1);
|
352
350
|
reader.ReadData(*protocol, compressed_buffer.ptr, page_hdr.compressed_page_size);
|
353
351
|
|
354
352
|
DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, page_hdr.compressed_page_size, block->ptr,
|