duckdb 1.2.1-dev6.0 → 1.2.1-dev8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
- package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
- package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
- package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
- package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
- package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
- package/src/duckdb/extension/json/json_extension.cpp +8 -3
- package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
- package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
- package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
- package/src/duckdb/src/catalog/catalog.cpp +12 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
- package/src/duckdb/src/common/bind_helpers.cpp +3 -0
- package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +19 -6
- package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
- package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
- package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
- package/src/duckdb/src/include/duckdb.h +495 -480
- package/src/duckdb/src/main/attached_database.cpp +1 -1
- package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
- package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
- package/src/duckdb/src/main/config.cpp +7 -1
- package/src/duckdb/src/main/database.cpp +8 -8
- package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
- package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
- package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
- package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
- package/src/duckdb/src/parallel/executor_task.cpp +10 -6
- package/src/duckdb/src/parallel/task_executor.cpp +4 -1
- package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/storage_manager.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group.cpp +5 -6
- package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
- package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
- package/src/duckdb/ub_src_parallel.cpp +2 -0
package/package.json
CHANGED
@@ -44,14 +44,7 @@ struct StringAggFunction {
|
|
44
44
|
if (!state.dataptr) {
|
45
45
|
finalize_data.ReturnNull();
|
46
46
|
} else {
|
47
|
-
target =
|
48
|
-
}
|
49
|
-
}
|
50
|
-
|
51
|
-
template <class STATE>
|
52
|
-
static void Destroy(STATE &state, AggregateInputData &aggr_input_data) {
|
53
|
-
if (state.dataptr) {
|
54
|
-
delete[] state.dataptr;
|
47
|
+
target = string_t(state.dataptr, state.size);
|
55
48
|
}
|
56
49
|
}
|
57
50
|
|
@@ -59,12 +52,12 @@ struct StringAggFunction {
|
|
59
52
|
return true;
|
60
53
|
}
|
61
54
|
|
62
|
-
static inline void PerformOperation(StringAggState &state,
|
63
|
-
idx_t sep_size) {
|
55
|
+
static inline void PerformOperation(StringAggState &state, ArenaAllocator &allocator, const char *str,
|
56
|
+
const char *sep, idx_t str_size, idx_t sep_size) {
|
64
57
|
if (!state.dataptr) {
|
65
58
|
// first iteration: allocate space for the string and copy it into the state
|
66
59
|
state.alloc_size = MaxValue<idx_t>(8, NextPowerOfTwo(str_size));
|
67
|
-
state.dataptr =
|
60
|
+
state.dataptr = char_ptr_cast(allocator.Allocate(state.alloc_size));
|
68
61
|
state.size = str_size;
|
69
62
|
memcpy(state.dataptr, str, str_size);
|
70
63
|
} else {
|
@@ -72,13 +65,12 @@ struct StringAggFunction {
|
|
72
65
|
idx_t required_size = state.size + str_size + sep_size;
|
73
66
|
if (required_size > state.alloc_size) {
|
74
67
|
// no space! allocate extra space
|
68
|
+
const auto old_size = state.alloc_size;
|
75
69
|
while (state.alloc_size < required_size) {
|
76
70
|
state.alloc_size *= 2;
|
77
71
|
}
|
78
|
-
|
79
|
-
|
80
|
-
delete[] state.dataptr;
|
81
|
-
state.dataptr = new_data;
|
72
|
+
state.dataptr =
|
73
|
+
char_ptr_cast(allocator.Reallocate(data_ptr_cast(state.dataptr), old_size, state.alloc_size));
|
82
74
|
}
|
83
75
|
// copy the separator
|
84
76
|
memcpy(state.dataptr + state.size, sep, sep_size);
|
@@ -89,14 +81,15 @@ struct StringAggFunction {
|
|
89
81
|
}
|
90
82
|
}
|
91
83
|
|
92
|
-
static inline void PerformOperation(StringAggState &state, string_t str,
|
84
|
+
static inline void PerformOperation(StringAggState &state, ArenaAllocator &allocator, string_t str,
|
85
|
+
optional_ptr<FunctionData> data_p) {
|
93
86
|
auto &data = data_p->Cast<StringAggBindData>();
|
94
|
-
PerformOperation(state, str.GetData(), data.sep.c_str(), str.GetSize(), data.sep.size());
|
87
|
+
PerformOperation(state, allocator, str.GetData(), data.sep.c_str(), str.GetSize(), data.sep.size());
|
95
88
|
}
|
96
89
|
|
97
90
|
template <class INPUT_TYPE, class STATE, class OP>
|
98
91
|
static void Operation(STATE &state, const INPUT_TYPE &input, AggregateUnaryInput &unary_input) {
|
99
|
-
PerformOperation(state, input, unary_input.input.bind_data);
|
92
|
+
PerformOperation(state, unary_input.input.allocator, input, unary_input.input.bind_data);
|
100
93
|
}
|
101
94
|
|
102
95
|
template <class INPUT_TYPE, class STATE, class OP>
|
@@ -113,8 +106,8 @@ struct StringAggFunction {
|
|
113
106
|
// source is not set: skip combining
|
114
107
|
return;
|
115
108
|
}
|
116
|
-
PerformOperation(target,
|
117
|
-
aggr_input_data.bind_data);
|
109
|
+
PerformOperation(target, aggr_input_data.allocator,
|
110
|
+
string_t(source.dataptr, UnsafeNumericCast<uint32_t>(source.size)), aggr_input_data.bind_data);
|
118
111
|
}
|
119
112
|
};
|
120
113
|
|
@@ -162,8 +155,7 @@ AggregateFunctionSet StringAggFun::GetFunctions() {
|
|
162
155
|
AggregateFunction::UnaryScatterUpdate<StringAggState, string_t, StringAggFunction>,
|
163
156
|
AggregateFunction::StateCombine<StringAggState, StringAggFunction>,
|
164
157
|
AggregateFunction::StateFinalize<StringAggState, string_t, StringAggFunction>,
|
165
|
-
AggregateFunction::UnaryUpdate<StringAggState, string_t, StringAggFunction>, StringAggBind
|
166
|
-
AggregateFunction::StateDestroy<StringAggState, StringAggFunction>);
|
158
|
+
AggregateFunction::UnaryUpdate<StringAggState, string_t, StringAggFunction>, StringAggBind);
|
167
159
|
string_agg_param.serialize = StringAggSerialize;
|
168
160
|
string_agg_param.deserialize = StringAggDeserialize;
|
169
161
|
string_agg.AddFunction(string_agg_param);
|
@@ -116,7 +116,6 @@ static void ListFinalize(Vector &states_vector, AggregateInputData &aggr_input_d
|
|
116
116
|
|
117
117
|
// first iterate over all entries and set up the list entries, and get the newly required total length
|
118
118
|
for (idx_t i = 0; i < count; i++) {
|
119
|
-
|
120
119
|
auto &state = *states[states_data.sel->get_index(i)];
|
121
120
|
const auto rid = i + offset;
|
122
121
|
result_data[rid].offset = total_len;
|
@@ -223,17 +223,6 @@ void ExecuteExpression(const idx_t elem_cnt, const LambdaFunctions::ColumnInfo &
|
|
223
223
|
// ListLambdaBindData
|
224
224
|
//===--------------------------------------------------------------------===//
|
225
225
|
|
226
|
-
unique_ptr<FunctionData> ListLambdaBindData::Copy() const {
|
227
|
-
auto lambda_expr_copy = lambda_expr ? lambda_expr->Copy() : nullptr;
|
228
|
-
return make_uniq<ListLambdaBindData>(return_type, std::move(lambda_expr_copy), has_index);
|
229
|
-
}
|
230
|
-
|
231
|
-
bool ListLambdaBindData::Equals(const FunctionData &other_p) const {
|
232
|
-
auto &other = other_p.Cast<ListLambdaBindData>();
|
233
|
-
return Expression::Equals(lambda_expr, other.lambda_expr) && return_type == other.return_type &&
|
234
|
-
has_index == other.has_index;
|
235
|
-
}
|
236
|
-
|
237
226
|
void ListLambdaBindData::Serialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
|
238
227
|
const ScalarFunction &) {
|
239
228
|
auto &bind_data = bind_data_p->Cast<ListLambdaBindData>();
|
@@ -15,7 +15,17 @@
|
|
15
15
|
|
16
16
|
namespace duckdb {
|
17
17
|
|
18
|
-
|
18
|
+
struct ListAggregatesLocalState : public FunctionLocalState {
|
19
|
+
explicit ListAggregatesLocalState(Allocator &allocator) : arena_allocator(allocator) {
|
20
|
+
}
|
21
|
+
|
22
|
+
ArenaAllocator arena_allocator;
|
23
|
+
};
|
24
|
+
|
25
|
+
unique_ptr<FunctionLocalState> ListAggregatesInitLocalState(ExpressionState &state, const BoundFunctionExpression &expr,
|
26
|
+
FunctionData *bind_data) {
|
27
|
+
return make_uniq<ListAggregatesLocalState>(BufferAllocator::Get(state.GetContext()));
|
28
|
+
}
|
19
29
|
// FIXME: benchmark the use of simple_update against using update (if applicable)
|
20
30
|
|
21
31
|
static unique_ptr<FunctionData> ListAggregatesBindFailure(ScalarFunction &bound_function) {
|
@@ -207,7 +217,8 @@ static void ListAggregatesFunction(DataChunk &args, ExpressionState &state, Vect
|
|
207
217
|
auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
|
208
218
|
auto &info = func_expr.bind_info->Cast<ListAggregatesBindData>();
|
209
219
|
auto &aggr = info.aggr_expr->Cast<BoundAggregateExpression>();
|
210
|
-
|
220
|
+
auto &allocator = ExecuteFunctionState::GetFunctionState(state)->Cast<ListAggregatesLocalState>().arena_allocator;
|
221
|
+
allocator.Reset();
|
211
222
|
AggregateInputData aggr_input_data(aggr.bind_info.get(), allocator);
|
212
223
|
|
213
224
|
D_ASSERT(aggr.function.update);
|
@@ -511,8 +522,9 @@ static unique_ptr<FunctionData> ListUniqueBind(ClientContext &context, ScalarFun
|
|
511
522
|
}
|
512
523
|
|
513
524
|
ScalarFunction ListAggregateFun::GetFunction() {
|
514
|
-
auto result =
|
515
|
-
|
525
|
+
auto result =
|
526
|
+
ScalarFunction({LogicalType::LIST(LogicalType::ANY), LogicalType::VARCHAR}, LogicalType::ANY,
|
527
|
+
ListAggregateFunction, ListAggregateBind, nullptr, nullptr, ListAggregatesInitLocalState);
|
516
528
|
BaseScalarFunction::SetReturnsError(result);
|
517
529
|
result.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
|
518
530
|
result.varargs = LogicalType::ANY;
|
@@ -523,12 +535,12 @@ ScalarFunction ListAggregateFun::GetFunction() {
|
|
523
535
|
|
524
536
|
ScalarFunction ListDistinctFun::GetFunction() {
|
525
537
|
return ScalarFunction({LogicalType::LIST(LogicalType::ANY)}, LogicalType::LIST(LogicalType::ANY),
|
526
|
-
ListDistinctFunction, ListDistinctBind);
|
538
|
+
ListDistinctFunction, ListDistinctBind, nullptr, nullptr, ListAggregatesInitLocalState);
|
527
539
|
}
|
528
540
|
|
529
541
|
ScalarFunction ListUniqueFun::GetFunction() {
|
530
542
|
return ScalarFunction({LogicalType::LIST(LogicalType::ANY)}, LogicalType::UBIGINT, ListUniqueFunction,
|
531
|
-
ListUniqueBind);
|
543
|
+
ListUniqueBind, nullptr, nullptr, ListAggregatesInitLocalState);
|
532
544
|
}
|
533
545
|
|
534
546
|
} // namespace duckdb
|
@@ -71,13 +71,20 @@ unique_ptr<FunctionData> ICUDateFunc::Bind(ClientContext &context, ScalarFunctio
|
|
71
71
|
return make_uniq<BindData>(context);
|
72
72
|
}
|
73
73
|
|
74
|
-
|
74
|
+
bool ICUDateFunc::TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
|
75
75
|
auto tz = icu_66::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(icu::StringPiece(tz_id.GetString())));
|
76
76
|
if (*tz == icu::TimeZone::getUnknown()) {
|
77
77
|
delete tz;
|
78
|
-
|
78
|
+
return false;
|
79
79
|
}
|
80
80
|
calendar->adoptTimeZone(tz);
|
81
|
+
return true;
|
82
|
+
}
|
83
|
+
|
84
|
+
void ICUDateFunc::SetTimeZone(icu::Calendar *calendar, const string_t &tz_id) {
|
85
|
+
if (!TrySetTimeZone(calendar, tz_id)) {
|
86
|
+
throw NotImplementedException("Unknown TimeZone '%s'", tz_id.GetString());
|
87
|
+
}
|
81
88
|
}
|
82
89
|
|
83
90
|
timestamp_t ICUDateFunc::GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros) {
|
@@ -11,9 +11,7 @@
|
|
11
11
|
#include "duckdb/execution/expression_executor.hpp"
|
12
12
|
#include "duckdb/function/scalar/strftime_format.hpp"
|
13
13
|
#include "duckdb/main/client_context.hpp"
|
14
|
-
#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp"
|
15
14
|
#include "duckdb/planner/expression/bound_function_expression.hpp"
|
16
|
-
#include "duckdb/function/function_binder.hpp"
|
17
15
|
#include "duckdb/function/cast/default_casts.hpp"
|
18
16
|
#include "duckdb/main/extension_util.hpp"
|
19
17
|
|
@@ -60,14 +58,7 @@ struct ICUStrptime : public ICUDateFunc {
|
|
60
58
|
}
|
61
59
|
|
62
60
|
static uint64_t ToMicros(icu::Calendar *calendar, const ParseResult &parsed, const StrpTimeFormat &format) {
|
63
|
-
//
|
64
|
-
// Note that empty TZ names are not allowed,
|
65
|
-
// but unknown names will map to GMT.
|
66
|
-
if (!parsed.tz.empty()) {
|
67
|
-
SetTimeZone(calendar, parsed.tz);
|
68
|
-
}
|
69
|
-
|
70
|
-
// Now get the parts in the given time zone
|
61
|
+
// Get the parts in the current time zone
|
71
62
|
uint64_t micros = parsed.GetMicros();
|
72
63
|
calendar->set(UCAL_EXTENDED_YEAR, parsed.data[0]); // strptime doesn't understand eras
|
73
64
|
calendar->set(UCAL_MONTH, parsed.data[1] - 1);
|
@@ -110,6 +101,11 @@ struct ICUStrptime : public ICUDateFunc {
|
|
110
101
|
if (parsed.is_special) {
|
111
102
|
return parsed.ToTimestamp();
|
112
103
|
} else {
|
104
|
+
// Set TZ first, if any.
|
105
|
+
if (!parsed.tz.empty()) {
|
106
|
+
SetTimeZone(calendar, parsed.tz);
|
107
|
+
}
|
108
|
+
|
113
109
|
return GetTime(calendar, ToMicros(calendar, parsed, format));
|
114
110
|
}
|
115
111
|
}
|
@@ -143,7 +139,7 @@ struct ICUStrptime : public ICUDateFunc {
|
|
143
139
|
if (format.Parse(input, parsed)) {
|
144
140
|
if (parsed.is_special) {
|
145
141
|
return parsed.ToTimestamp();
|
146
|
-
} else {
|
142
|
+
} else if (parsed.tz.empty() || TrySetTimeZone(calendar, parsed.tz)) {
|
147
143
|
timestamp_t result;
|
148
144
|
if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
|
149
145
|
return result;
|
@@ -49,7 +49,9 @@ struct ICUDateFunc {
|
|
49
49
|
static duckdb::unique_ptr<FunctionData> Bind(ClientContext &context, ScalarFunction &bound_function,
|
50
50
|
vector<duckdb::unique_ptr<Expression>> &arguments);
|
51
51
|
|
52
|
-
//!
|
52
|
+
//! Tries to set the time zone for the calendar and returns false if it is not valid.
|
53
|
+
static bool TrySetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
|
54
|
+
//! Sets the time zone for the calendar. Throws if it is not valid
|
53
55
|
static void SetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
|
54
56
|
//! Gets the timestamp from the calendar, throwing if it is not in range.
|
55
57
|
static bool TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result);
|
@@ -90,22 +90,16 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
|
|
90
90
|
optional_ptr<FileHandle> override_handle) {
|
91
91
|
if (size != 0) {
|
92
92
|
auto &handle = override_handle ? *override_handle.get() : *file_handle.get();
|
93
|
-
if (can_seek) {
|
94
|
-
handle.Read(pointer, size, position);
|
95
|
-
} else if (sample_run) { // Cache the buffer
|
96
|
-
handle.Read(pointer, size, position);
|
97
93
|
|
94
|
+
if (!cached_buffers.empty() || position < cached_size) {
|
95
|
+
ReadFromCache(pointer, size, position);
|
96
|
+
}
|
97
|
+
|
98
|
+
handle.Read(pointer, size, position);
|
99
|
+
if (file_handle->IsPipe()) { // Cache the buffer
|
98
100
|
cached_buffers.emplace_back(allocator.Allocate(size));
|
99
101
|
memcpy(cached_buffers.back().get(), pointer, size);
|
100
102
|
cached_size += size;
|
101
|
-
} else {
|
102
|
-
if (!cached_buffers.empty() || position < cached_size) {
|
103
|
-
ReadFromCache(pointer, size, position);
|
104
|
-
}
|
105
|
-
|
106
|
-
if (size != 0) {
|
107
|
-
handle.Read(pointer, size, position);
|
108
|
-
}
|
109
103
|
}
|
110
104
|
}
|
111
105
|
|
@@ -121,30 +115,23 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
|
|
121
115
|
|
122
116
|
bool JSONFileHandle::Read(char *pointer, idx_t &read_size, idx_t requested_size, bool &file_done, bool sample_run) {
|
123
117
|
D_ASSERT(requested_size != 0);
|
118
|
+
read_size = 0;
|
124
119
|
if (last_read_requested) {
|
125
120
|
return false;
|
126
121
|
}
|
127
122
|
|
128
|
-
if (
|
129
|
-
read_size
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
}
|
137
|
-
cached_size += read_size;
|
138
|
-
read_position += read_size;
|
139
|
-
} else {
|
140
|
-
read_size = 0;
|
141
|
-
if (!cached_buffers.empty() || read_position < cached_size) {
|
142
|
-
read_size += ReadFromCache(pointer, requested_size, read_position);
|
143
|
-
}
|
144
|
-
if (requested_size != 0) {
|
145
|
-
read_size += ReadInternal(pointer, requested_size);
|
146
|
-
}
|
123
|
+
if (!cached_buffers.empty() || read_position < cached_size) {
|
124
|
+
read_size += ReadFromCache(pointer, requested_size, read_position);
|
125
|
+
}
|
126
|
+
|
127
|
+
auto temp_read_size = ReadInternal(pointer, requested_size);
|
128
|
+
if (file_handle->IsPipe() && temp_read_size != 0) { // Cache the buffer
|
129
|
+
cached_buffers.emplace_back(allocator.Allocate(temp_read_size));
|
130
|
+
memcpy(cached_buffers.back().get(), pointer, temp_read_size);
|
147
131
|
}
|
132
|
+
cached_size += temp_read_size;
|
133
|
+
read_position += temp_read_size;
|
134
|
+
read_size += temp_read_size;
|
148
135
|
|
149
136
|
if (read_size == 0) {
|
150
137
|
last_read_requested = true;
|
@@ -17,12 +17,17 @@
|
|
17
17
|
namespace duckdb {
|
18
18
|
|
19
19
|
static DefaultMacro json_macros[] = {
|
20
|
-
{DEFAULT_SCHEMA,
|
20
|
+
{DEFAULT_SCHEMA,
|
21
|
+
"json_group_array",
|
22
|
+
{"x", nullptr},
|
23
|
+
{{nullptr, nullptr}},
|
24
|
+
"CAST('[' || string_agg(CASE WHEN x IS NULL THEN 'null'::JSON ELSE to_json(x) END, ',') || ']' AS JSON)"},
|
21
25
|
{DEFAULT_SCHEMA,
|
22
26
|
"json_group_object",
|
23
|
-
{"
|
27
|
+
{"n", "v", nullptr},
|
24
28
|
{{nullptr, nullptr}},
|
25
|
-
"
|
29
|
+
"CAST('{' || string_agg(to_json(n::VARCHAR) || ':' || CASE WHEN v IS NULL THEN 'null'::JSON ELSE to_json(v) END, "
|
30
|
+
"',') || '}' AS JSON)"},
|
26
31
|
{DEFAULT_SCHEMA,
|
27
32
|
"json_group_structure",
|
28
33
|
{"x", nullptr},
|
@@ -319,7 +319,8 @@ void ColumnReader::PreparePageV2(PageHeader &page_hdr) {
|
|
319
319
|
|
320
320
|
auto compressed_bytes = page_hdr.compressed_page_size - uncompressed_bytes;
|
321
321
|
|
322
|
-
|
322
|
+
ResizeableBuffer compressed_buffer;
|
323
|
+
compressed_buffer.resize(GetAllocator(), compressed_bytes);
|
323
324
|
reader.ReadData(*protocol, compressed_buffer.ptr, compressed_bytes);
|
324
325
|
|
325
326
|
DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, compressed_bytes, block->ptr + uncompressed_bytes,
|
@@ -334,10 +335,6 @@ void ColumnReader::AllocateBlock(idx_t size) {
|
|
334
335
|
}
|
335
336
|
}
|
336
337
|
|
337
|
-
void ColumnReader::AllocateCompressed(idx_t size) {
|
338
|
-
compressed_buffer.resize(GetAllocator(), size);
|
339
|
-
}
|
340
|
-
|
341
338
|
void ColumnReader::PreparePage(PageHeader &page_hdr) {
|
342
339
|
AllocateBlock(page_hdr.uncompressed_page_size + 1);
|
343
340
|
if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) {
|
@@ -348,7 +345,8 @@ void ColumnReader::PreparePage(PageHeader &page_hdr) {
|
|
348
345
|
return;
|
349
346
|
}
|
350
347
|
|
351
|
-
|
348
|
+
ResizeableBuffer compressed_buffer;
|
349
|
+
compressed_buffer.resize(GetAllocator(), page_hdr.compressed_page_size + 1);
|
352
350
|
reader.ReadData(*protocol, compressed_buffer.ptr, page_hdr.compressed_page_size);
|
353
351
|
|
354
352
|
DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, page_hdr.compressed_page_size, block->ptr,
|
@@ -309,6 +309,7 @@ struct PageInformation {
|
|
309
309
|
idx_t offset = 0;
|
310
310
|
idx_t row_count = 0;
|
311
311
|
idx_t empty_count = 0;
|
312
|
+
idx_t null_count = 0;
|
312
313
|
idx_t estimated_page_size = 0;
|
313
314
|
};
|
314
315
|
|
@@ -388,7 +389,7 @@ protected:
|
|
388
389
|
virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
|
389
390
|
|
390
391
|
//! Initialize the writer for a specific page. Only used for scalar types.
|
391
|
-
virtual unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state);
|
392
|
+
virtual unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx);
|
392
393
|
|
393
394
|
//! Flushes the writer for a specific page. Only used for scalar types.
|
394
395
|
virtual void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state);
|
@@ -427,7 +428,8 @@ void BasicColumnWriter::RegisterToRowGroup(duckdb_parquet::RowGroup &row_group)
|
|
427
428
|
row_group.columns.push_back(std::move(column_chunk));
|
428
429
|
}
|
429
430
|
|
430
|
-
unique_ptr<ColumnWriterPageState> BasicColumnWriter::InitializePageState(BasicColumnWriterState &state
|
431
|
+
unique_ptr<ColumnWriterPageState> BasicColumnWriter::InitializePageState(BasicColumnWriterState &state,
|
432
|
+
idx_t page_idx) {
|
431
433
|
return nullptr;
|
432
434
|
}
|
433
435
|
|
@@ -463,6 +465,8 @@ void BasicColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *p
|
|
463
465
|
state.page_info.push_back(new_info);
|
464
466
|
page_info_ref = state.page_info.back();
|
465
467
|
}
|
468
|
+
} else {
|
469
|
+
page_info.null_count++;
|
466
470
|
}
|
467
471
|
vector_index++;
|
468
472
|
}
|
@@ -502,7 +506,7 @@ void BasicColumnWriter::BeginWrite(ColumnWriterState &state_p) {
|
|
502
506
|
MaxValue<idx_t>(NextPowerOfTwo(page_info.estimated_page_size), MemoryStream::DEFAULT_INITIAL_CAPACITY));
|
503
507
|
write_info.write_count = page_info.empty_count;
|
504
508
|
write_info.max_write_count = page_info.row_count;
|
505
|
-
write_info.page_state = InitializePageState(state);
|
509
|
+
write_info.page_state = InitializePageState(state, page_idx);
|
506
510
|
|
507
511
|
write_info.compressed_size = 0;
|
508
512
|
write_info.compressed_data = nullptr;
|
@@ -796,7 +800,6 @@ public:
|
|
796
800
|
};
|
797
801
|
|
798
802
|
struct BaseParquetOperator {
|
799
|
-
|
800
803
|
template <class SRC, class TGT>
|
801
804
|
static void WriteToStream(const TGT &input, WriteStream &ser) {
|
802
805
|
ser.WriteData(const_data_ptr_cast(&input), sizeof(TGT));
|
@@ -815,6 +818,11 @@ struct BaseParquetOperator {
|
|
815
818
|
template <class SRC, class TGT>
|
816
819
|
static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
|
817
820
|
}
|
821
|
+
|
822
|
+
template <class SRC, class TGT>
|
823
|
+
static idx_t GetRowSize(const Vector &, idx_t) {
|
824
|
+
return sizeof(TGT);
|
825
|
+
}
|
818
826
|
};
|
819
827
|
|
820
828
|
struct ParquetCastOperator : public BaseParquetOperator {
|
@@ -936,6 +944,11 @@ struct ParquetStringOperator : public BaseParquetOperator {
|
|
936
944
|
static uint64_t XXHash64(const TGT &target_value) {
|
937
945
|
return duckdb_zstd::XXH64(target_value.GetData(), target_value.GetSize(), 0);
|
938
946
|
}
|
947
|
+
|
948
|
+
template <class SRC, class TGT>
|
949
|
+
static idx_t GetRowSize(const Vector &vector, idx_t index) {
|
950
|
+
return FlatVector::GetData<string_t>(vector)[index].GetSize();
|
951
|
+
}
|
939
952
|
};
|
940
953
|
|
941
954
|
struct ParquetIntervalTargetType {
|
@@ -1066,6 +1079,7 @@ public:
|
|
1066
1079
|
// analysis state for integer values for DELTA_BINARY_PACKED/DELTA_LENGTH_BYTE_ARRAY
|
1067
1080
|
idx_t total_value_count = 0;
|
1068
1081
|
idx_t total_string_size = 0;
|
1082
|
+
uint32_t key_bit_width = 0;
|
1069
1083
|
|
1070
1084
|
unordered_map<T, uint32_t> dictionary;
|
1071
1085
|
duckdb_parquet::Encoding::type encoding;
|
@@ -1222,11 +1236,12 @@ public:
|
|
1222
1236
|
return std::move(result);
|
1223
1237
|
}
|
1224
1238
|
|
1225
|
-
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
|
1239
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p, idx_t page_idx) override {
|
1226
1240
|
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1227
|
-
|
1228
|
-
auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(
|
1229
|
-
|
1241
|
+
const auto &page_info = state_p.page_info[page_idx];
|
1242
|
+
auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(
|
1243
|
+
page_info.row_count - (page_info.empty_count + page_info.null_count), state.total_string_size,
|
1244
|
+
state.encoding, state.dictionary);
|
1230
1245
|
return std::move(result);
|
1231
1246
|
}
|
1232
1247
|
|
@@ -1335,6 +1350,8 @@ public:
|
|
1335
1350
|
}
|
1336
1351
|
}
|
1337
1352
|
state.dictionary.clear();
|
1353
|
+
} else {
|
1354
|
+
state.key_bit_width = RleBpDecoder::ComputeBitWidth(state.dictionary.size());
|
1338
1355
|
}
|
1339
1356
|
}
|
1340
1357
|
|
@@ -1488,9 +1505,13 @@ public:
|
|
1488
1505
|
// bloom filter will be queued for writing in ParquetWriter::BufferBloomFilter one level up
|
1489
1506
|
}
|
1490
1507
|
|
1491
|
-
// TODO this now vastly over-estimates the page size
|
1492
1508
|
idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state_p) const override {
|
1493
|
-
|
1509
|
+
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1510
|
+
if (state.encoding == Encoding::RLE_DICTIONARY) {
|
1511
|
+
return (state.key_bit_width + 7) / 8;
|
1512
|
+
} else {
|
1513
|
+
return OP::template GetRowSize<SRC, TGT>(vector, index);
|
1514
|
+
}
|
1494
1515
|
}
|
1495
1516
|
};
|
1496
1517
|
|
@@ -1570,7 +1591,7 @@ public:
|
|
1570
1591
|
}
|
1571
1592
|
}
|
1572
1593
|
|
1573
|
-
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
|
1594
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx) override {
|
1574
1595
|
return make_uniq<BooleanWriterPageState>();
|
1575
1596
|
}
|
1576
1597
|
|
@@ -1812,7 +1833,7 @@ public:
|
|
1812
1833
|
}
|
1813
1834
|
}
|
1814
1835
|
|
1815
|
-
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
|
1836
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx) override {
|
1816
1837
|
return make_uniq<EnumWriterPageState>(bit_width);
|
1817
1838
|
}
|
1818
1839
|
|
@@ -160,7 +160,6 @@ protected:
|
|
160
160
|
|
161
161
|
private:
|
162
162
|
void AllocateBlock(idx_t size);
|
163
|
-
void AllocateCompressed(idx_t size);
|
164
163
|
void PrepareRead(parquet_filter_t &filter);
|
165
164
|
void PreparePage(PageHeader &page_hdr);
|
166
165
|
void PrepareDataPage(PageHeader &page_hdr);
|
@@ -178,7 +177,6 @@ private:
|
|
178
177
|
|
179
178
|
shared_ptr<ResizeableBuffer> block;
|
180
179
|
|
181
|
-
ResizeableBuffer compressed_buffer;
|
182
180
|
ResizeableBuffer offset_buffer;
|
183
181
|
|
184
182
|
unique_ptr<RleBpDecoder> dict_decoder;
|
@@ -33,9 +33,8 @@ public:
|
|
33
33
|
}
|
34
34
|
|
35
35
|
void FinishWrite(WriteStream &writer) {
|
36
|
-
D_ASSERT(stream->GetPosition() == total_string_size);
|
37
36
|
dbp_encoder.FinishWrite(writer);
|
38
|
-
writer.WriteData(buffer.get(),
|
37
|
+
writer.WriteData(buffer.get(), stream->GetPosition());
|
39
38
|
}
|
40
39
|
|
41
40
|
private:
|
@@ -769,6 +769,12 @@ CatalogEntryLookup Catalog::TryLookupEntry(CatalogEntryRetriever &retriever, Cat
|
|
769
769
|
|
770
770
|
if (if_not_found == OnEntryNotFound::RETURN_NULL) {
|
771
771
|
return {nullptr, nullptr, ErrorData()};
|
772
|
+
}
|
773
|
+
// Check if the default database is actually attached. CreateMissingEntryException will throw binder exception
|
774
|
+
// otherwise.
|
775
|
+
if (!GetCatalogEntry(context, GetDefaultCatalog(retriever))) {
|
776
|
+
auto except = CatalogException("%s with name %s does not exist!", CatalogTypeToString(type), name);
|
777
|
+
return {nullptr, nullptr, ErrorData(except)};
|
772
778
|
} else {
|
773
779
|
auto except = CreateMissingEntryException(retriever, name, type, schemas, error_context);
|
774
780
|
return {nullptr, nullptr, ErrorData(except)};
|
@@ -805,6 +811,12 @@ CatalogEntryLookup Catalog::TryLookupEntry(CatalogEntryRetriever &retriever, vec
|
|
805
811
|
|
806
812
|
if (if_not_found == OnEntryNotFound::RETURN_NULL) {
|
807
813
|
return {nullptr, nullptr, ErrorData()};
|
814
|
+
}
|
815
|
+
// Check if the default database is actually attached. CreateMissingEntryException will throw binder exception
|
816
|
+
// otherwise.
|
817
|
+
if (!GetCatalogEntry(context, GetDefaultCatalog(retriever))) {
|
818
|
+
auto except = CatalogException("%s with name %s does not exist!", CatalogTypeToString(type), name);
|
819
|
+
return {nullptr, nullptr, ErrorData(except)};
|
808
820
|
} else {
|
809
821
|
auto except = CreateMissingEntryException(retriever, name, type, schemas, error_context);
|
810
822
|
return {nullptr, nullptr, ErrorData(except)};
|
@@ -863,7 +863,7 @@ unique_ptr<CatalogEntry> DuckTableEntry::Copy(ClientContext &context) const {
|
|
863
863
|
}
|
864
864
|
|
865
865
|
auto binder = Binder::CreateBinder(context);
|
866
|
-
auto bound_create_info = binder->
|
866
|
+
auto bound_create_info = binder->BindCreateTableCheckpoint(std::move(create_info), schema);
|
867
867
|
return make_uniq<DuckTableEntry>(catalog, schema, *bound_create_info, storage);
|
868
868
|
}
|
869
869
|
|
@@ -76,7 +76,7 @@ void CatalogEntryRetriever::Inherit(const CatalogEntryRetriever &parent) {
|
|
76
76
|
this->search_path = parent.search_path;
|
77
77
|
}
|
78
78
|
|
79
|
-
CatalogSearchPath &CatalogEntryRetriever::GetSearchPath() {
|
79
|
+
const CatalogSearchPath &CatalogEntryRetriever::GetSearchPath() const {
|
80
80
|
if (search_path) {
|
81
81
|
return *search_path;
|
82
82
|
}
|