duckdb 0.7.2-dev1671.0 → 0.7.2-dev1734.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-datefunc.cpp +20 -8
- package/src/duckdb/extension/icu/icu-strptime.cpp +117 -29
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -0
- package/src/duckdb/src/common/local_file_system.cpp +13 -2
- package/src/duckdb/src/common/sort/partition_state.cpp +644 -0
- package/src/duckdb/src/execution/expression_executor.cpp +1 -1
- package/src/duckdb/src/execution/expression_executor_state.cpp +2 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +77 -849
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +247 -0
- package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -3
- package/src/duckdb/src/include/duckdb/planner/pragma_handler.hpp +3 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +1 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +77 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/temporary_file_information.hpp +12 -0
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +3 -59
- package/src/duckdb/src/main/extension/extension_install.cpp +11 -0
- package/src/duckdb/src/main/extension/extension_load.cpp +29 -3
- package/src/duckdb/src/main/query_profiler.cpp +1 -1
- package/src/duckdb/src/planner/pragma_handler.cpp +7 -5
- package/src/duckdb/src/storage/buffer/block_handle.cpp +128 -0
- package/src/duckdb/src/storage/buffer/block_manager.cpp +81 -0
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +132 -0
- package/src/duckdb/src/storage/buffer/buffer_pool_reservation.cpp +32 -0
- package/src/duckdb/src/storage/buffer_manager.cpp +0 -351
- package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +3 -5
- package/src/duckdb/ub_src_common_sort.cpp +2 -0
- package/src/duckdb/ub_src_storage_buffer.cpp +8 -0
package/package.json
CHANGED
@@ -65,24 +65,36 @@ timestamp_t ICUDateFunc::GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros)
|
|
65
65
|
return timestamp_t(millis * Interval::MICROS_PER_MSEC + micros);
|
66
66
|
}
|
67
67
|
|
68
|
-
|
68
|
+
bool ICUDateFunc::TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result) {
|
69
69
|
// Extract the new time
|
70
70
|
UErrorCode status = U_ZERO_ERROR;
|
71
71
|
auto millis = int64_t(calendar->getTime(status));
|
72
72
|
if (U_FAILURE(status)) {
|
73
|
-
|
73
|
+
return false;
|
74
74
|
}
|
75
75
|
|
76
76
|
// UDate is a double, so it can't overflow (it just loses accuracy), but converting back to µs can.
|
77
|
-
|
78
|
-
|
77
|
+
if (!TryMultiplyOperator::Operation<int64_t, int64_t, int64_t>(millis, Interval::MICROS_PER_MSEC, millis)) {
|
78
|
+
return false;
|
79
|
+
}
|
80
|
+
if (!TryAddOperator::Operation<int64_t, int64_t, int64_t>(millis, micros, millis)) {
|
81
|
+
return false;
|
82
|
+
}
|
79
83
|
|
80
84
|
// Now make sure the value is in range
|
81
|
-
|
82
|
-
|
83
|
-
|
85
|
+
result = timestamp_t(millis);
|
86
|
+
date_t out_date = Timestamp::GetDate(result);
|
87
|
+
int64_t days_micros;
|
88
|
+
return TryMultiplyOperator::Operation<int64_t, int64_t, int64_t>(out_date.days, Interval::MICROS_PER_DAY,
|
89
|
+
days_micros);
|
90
|
+
}
|
84
91
|
|
85
|
-
|
92
|
+
timestamp_t ICUDateFunc::GetTime(icu::Calendar *calendar, uint64_t micros) {
|
93
|
+
timestamp_t result;
|
94
|
+
if (!TryGetTime(calendar, micros, result)) {
|
95
|
+
throw ConversionException("Unable to convert ICU date to timestamp");
|
96
|
+
}
|
97
|
+
return result;
|
86
98
|
}
|
87
99
|
|
88
100
|
uint64_t ICUDateFunc::SetTime(icu::Calendar *calendar, timestamp_t date) {
|
@@ -19,17 +19,31 @@
|
|
19
19
|
namespace duckdb {
|
20
20
|
|
21
21
|
struct ICUStrptime : public ICUDateFunc {
|
22
|
+
using ParseResult = StrpTimeFormat::ParseResult;
|
23
|
+
|
22
24
|
struct ICUStrptimeBindData : public BindData {
|
23
|
-
ICUStrptimeBindData(ClientContext &context, const StrpTimeFormat &format)
|
25
|
+
ICUStrptimeBindData(ClientContext &context, const StrpTimeFormat &format)
|
26
|
+
: BindData(context), formats(1, format) {
|
27
|
+
}
|
28
|
+
ICUStrptimeBindData(ClientContext &context, vector<StrpTimeFormat> formats_p)
|
29
|
+
: BindData(context), formats(std::move(formats_p)) {
|
24
30
|
}
|
25
|
-
ICUStrptimeBindData(const ICUStrptimeBindData &other) : BindData(other),
|
31
|
+
ICUStrptimeBindData(const ICUStrptimeBindData &other) : BindData(other), formats(other.formats) {
|
26
32
|
}
|
27
33
|
|
28
|
-
StrpTimeFormat
|
34
|
+
vector<StrpTimeFormat> formats;
|
29
35
|
|
30
36
|
bool Equals(const FunctionData &other_p) const override {
|
31
37
|
auto &other = other_p.Cast<ICUStrptimeBindData>();
|
32
|
-
|
38
|
+
if (formats.size() != other.formats.size()) {
|
39
|
+
return false;
|
40
|
+
}
|
41
|
+
for (size_t i = 0; i < formats.size(); ++i) {
|
42
|
+
if (formats[i].format_specifier != other.formats[i].format_specifier) {
|
43
|
+
return false;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
return true;
|
33
47
|
}
|
34
48
|
duckdb::unique_ptr<FunctionData> Copy() const override {
|
35
49
|
return make_uniq<ICUStrptimeBindData>(*this);
|
@@ -53,13 +67,7 @@ struct ICUStrptime : public ICUDateFunc {
|
|
53
67
|
}
|
54
68
|
}
|
55
69
|
|
56
|
-
static
|
57
|
-
StrpTimeFormat::ParseResult parsed;
|
58
|
-
format.Parse(input, parsed);
|
59
|
-
if (!parsed.error_message.empty()) {
|
60
|
-
throw InvalidInputException(parsed.FormatError(input, format.format_specifier));
|
61
|
-
}
|
62
|
-
|
70
|
+
static uint64_t ToMicros(icu::Calendar *calendar, const ParseResult &parsed, const StrpTimeFormat &format) {
|
63
71
|
// Set TZ first, if any.
|
64
72
|
// Note that empty TZ names are not allowed,
|
65
73
|
// but unknown names will map to GMT.
|
@@ -83,18 +91,19 @@ struct ICUStrptime : public ICUDateFunc {
|
|
83
91
|
calendar->set(UCAL_ZONE_OFFSET, parsed.data[7] * Interval::MSECS_PER_SEC * Interval::SECS_PER_MINUTE);
|
84
92
|
}
|
85
93
|
|
86
|
-
return
|
94
|
+
return micros;
|
87
95
|
}
|
88
96
|
|
89
|
-
static void
|
97
|
+
static void Parse(DataChunk &args, ExpressionState &state, Vector &result) {
|
90
98
|
D_ASSERT(args.ColumnCount() == 2);
|
91
99
|
auto &str_arg = args.data[0];
|
92
100
|
auto &fmt_arg = args.data[1];
|
93
101
|
|
94
102
|
auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
|
95
103
|
auto &info = func_expr.bind_info->Cast<ICUStrptimeBindData>();
|
96
|
-
CalendarPtr
|
97
|
-
auto
|
104
|
+
CalendarPtr calendar_ptr(info.calendar->clone());
|
105
|
+
auto calendar = calendar_ptr.get();
|
106
|
+
auto &formats = info.formats;
|
98
107
|
|
99
108
|
D_ASSERT(fmt_arg.GetVectorType() == VectorType::CONSTANT_VECTOR);
|
100
109
|
|
@@ -102,12 +111,55 @@ struct ICUStrptime : public ICUDateFunc {
|
|
102
111
|
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
103
112
|
ConstantVector::SetNull(result, true);
|
104
113
|
} else {
|
105
|
-
UnaryExecutor::Execute<string_t, timestamp_t>(
|
106
|
-
|
114
|
+
UnaryExecutor::Execute<string_t, timestamp_t>(str_arg, result, args.size(), [&](string_t input) {
|
115
|
+
ParseResult parsed;
|
116
|
+
for (auto &format : info.formats) {
|
117
|
+
if (format.Parse(input, parsed)) {
|
118
|
+
return GetTime(calendar, ToMicros(calendar, parsed, format));
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
throw InvalidInputException(parsed.FormatError(input, info.formats[0].format_specifier));
|
123
|
+
});
|
107
124
|
}
|
108
125
|
}
|
109
126
|
|
110
|
-
static
|
127
|
+
static void TryParse(DataChunk &args, ExpressionState &state, Vector &result) {
|
128
|
+
D_ASSERT(args.ColumnCount() == 2);
|
129
|
+
auto &str_arg = args.data[0];
|
130
|
+
auto &fmt_arg = args.data[1];
|
131
|
+
|
132
|
+
auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
|
133
|
+
auto &info = func_expr.bind_info->Cast<ICUStrptimeBindData>();
|
134
|
+
CalendarPtr calendar_ptr(info.calendar->clone());
|
135
|
+
auto calendar = calendar_ptr.get();
|
136
|
+
auto &formats = info.formats;
|
137
|
+
|
138
|
+
D_ASSERT(fmt_arg.GetVectorType() == VectorType::CONSTANT_VECTOR);
|
139
|
+
|
140
|
+
if (ConstantVector::IsNull(fmt_arg)) {
|
141
|
+
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
142
|
+
ConstantVector::SetNull(result, true);
|
143
|
+
} else {
|
144
|
+
UnaryExecutor::ExecuteWithNulls<string_t, timestamp_t>(
|
145
|
+
str_arg, result, args.size(), [&](string_t input, ValidityMask &mask, idx_t idx) {
|
146
|
+
ParseResult parsed;
|
147
|
+
for (auto &format : info.formats) {
|
148
|
+
if (format.Parse(input, parsed)) {
|
149
|
+
timestamp_t result;
|
150
|
+
if (TryGetTime(calendar, ToMicros(calendar, parsed, format), result)) {
|
151
|
+
return result;
|
152
|
+
}
|
153
|
+
}
|
154
|
+
}
|
155
|
+
|
156
|
+
mask.SetInvalid(idx);
|
157
|
+
return timestamp_t();
|
158
|
+
});
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
static bind_scalar_function_t bind_strptime;
|
111
163
|
|
112
164
|
static duckdb::unique_ptr<FunctionData> StrpTimeBindFunction(ClientContext &context, ScalarFunction &bound_function,
|
113
165
|
vector<duckdb::unique_ptr<Expression>> &arguments) {
|
@@ -117,10 +169,14 @@ struct ICUStrptime : public ICUDateFunc {
|
|
117
169
|
if (!arguments[1]->IsFoldable()) {
|
118
170
|
throw InvalidInputException("strptime format must be a constant");
|
119
171
|
}
|
120
|
-
|
172
|
+
scalar_function_t function = (bound_function.name == "try_strptime") ? TryParse : Parse;
|
173
|
+
Value format_value = ExpressionExecutor::EvaluateScalar(context, *arguments[1]);
|
174
|
+
string format_string;
|
121
175
|
StrpTimeFormat format;
|
122
|
-
if (
|
123
|
-
|
176
|
+
if (format_value.IsNull()) {
|
177
|
+
;
|
178
|
+
} else if (format_value.type().id() == LogicalTypeId::VARCHAR) {
|
179
|
+
format_string = format_value.ToString();
|
124
180
|
format.format_specifier = format_string;
|
125
181
|
string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
|
126
182
|
if (!error.empty()) {
|
@@ -129,24 +185,46 @@ struct ICUStrptime : public ICUDateFunc {
|
|
129
185
|
|
130
186
|
// If we have a time zone, we should use ICU for parsing and return a TSTZ instead.
|
131
187
|
if (format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME)) {
|
132
|
-
bound_function.function =
|
188
|
+
bound_function.function = function;
|
133
189
|
bound_function.return_type = LogicalType::TIMESTAMP_TZ;
|
134
190
|
return make_uniq<ICUStrptimeBindData>(context, format);
|
135
191
|
}
|
192
|
+
} else if (format_value.type() == LogicalType::LIST(LogicalType::VARCHAR)) {
|
193
|
+
const auto &children = ListValue::GetChildren(format_value);
|
194
|
+
if (children.empty()) {
|
195
|
+
throw InvalidInputException("strptime format list must not be empty");
|
196
|
+
}
|
197
|
+
vector<StrpTimeFormat> formats;
|
198
|
+
bool has_tz = true;
|
199
|
+
for (const auto &child : children) {
|
200
|
+
format_string = child.ToString();
|
201
|
+
format.format_specifier = format_string;
|
202
|
+
string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
|
203
|
+
if (!error.empty()) {
|
204
|
+
throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
|
205
|
+
}
|
206
|
+
// If any format has UTC offsets, then we have to produce TSTZ
|
207
|
+
has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME);
|
208
|
+
formats.emplace_back(format);
|
209
|
+
}
|
210
|
+
if (has_tz) {
|
211
|
+
bound_function.function = function;
|
212
|
+
bound_function.return_type = LogicalType::TIMESTAMP_TZ;
|
213
|
+
return make_uniq<ICUStrptimeBindData>(context, formats);
|
214
|
+
}
|
136
215
|
}
|
137
216
|
|
138
217
|
// Fall back to faster, non-TZ parsing
|
139
|
-
bound_function.bind =
|
140
|
-
return
|
218
|
+
bound_function.bind = bind_strptime;
|
219
|
+
return bind_strptime(context, bound_function, arguments);
|
141
220
|
}
|
142
221
|
|
143
|
-
static void
|
222
|
+
static void TailPatch(const string &name, ClientContext &context, const vector<LogicalType> &types) {
|
144
223
|
// Find the old function
|
145
224
|
auto &catalog = Catalog::GetSystemCatalog(context);
|
146
225
|
auto entry = catalog.GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, DEFAULT_SCHEMA, name);
|
147
226
|
D_ASSERT(entry && entry->type == CatalogType::SCALAR_FUNCTION_ENTRY);
|
148
227
|
auto &func = entry->Cast<ScalarFunctionCatalogEntry>();
|
149
|
-
vector<LogicalType> types {LogicalType::VARCHAR, LogicalType::VARCHAR};
|
150
228
|
string error;
|
151
229
|
|
152
230
|
FunctionBinder function_binder(context);
|
@@ -157,10 +235,18 @@ struct ICUStrptime : public ICUDateFunc {
|
|
157
235
|
|
158
236
|
// Tail patch the old binder
|
159
237
|
auto &bound_function = func.functions.GetFunctionReferenceByOffset(best_function);
|
160
|
-
|
238
|
+
bind_strptime = bound_function.bind;
|
161
239
|
bound_function.bind = StrpTimeBindFunction;
|
162
240
|
}
|
163
241
|
|
242
|
+
static void AddBinaryTimestampFunction(const string &name, ClientContext &context) {
|
243
|
+
vector<LogicalType> types {LogicalType::VARCHAR, LogicalType::VARCHAR};
|
244
|
+
TailPatch(name, context, types);
|
245
|
+
|
246
|
+
types[1] = LogicalType::LIST(LogicalType::VARCHAR);
|
247
|
+
TailPatch(name, context, types);
|
248
|
+
}
|
249
|
+
|
164
250
|
static bool CastFromVarchar(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
165
251
|
auto &cast_data = parameters.cast_data->Cast<CastData>();
|
166
252
|
auto info = (BindData *)cast_data.info.get();
|
@@ -233,7 +319,7 @@ struct ICUStrptime : public ICUDateFunc {
|
|
233
319
|
}
|
234
320
|
};
|
235
321
|
|
236
|
-
bind_scalar_function_t ICUStrptime::
|
322
|
+
bind_scalar_function_t ICUStrptime::bind_strptime = nullptr;
|
237
323
|
|
238
324
|
struct ICUStrftime : public ICUDateFunc {
|
239
325
|
static void ParseFormatSpecifier(string_t &format_str, StrfTimeFormat &format) {
|
@@ -284,7 +370,7 @@ struct ICUStrftime : public ICUDateFunc {
|
|
284
370
|
auto &fmt_arg = args.data[1];
|
285
371
|
|
286
372
|
auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
|
287
|
-
auto &info =
|
373
|
+
auto &info = func_expr.bind_info->Cast<BindData>();
|
288
374
|
CalendarPtr calendar(info.calendar->clone());
|
289
375
|
const auto tz_name = info.tz_setting.c_str();
|
290
376
|
|
@@ -420,6 +506,8 @@ struct ICUStrftime : public ICUDateFunc {
|
|
420
506
|
|
421
507
|
void RegisterICUStrptimeFunctions(ClientContext &context) {
|
422
508
|
ICUStrptime::AddBinaryTimestampFunction("strptime", context);
|
509
|
+
ICUStrptime::AddBinaryTimestampFunction("try_strptime", context);
|
510
|
+
|
423
511
|
ICUStrftime::AddBinaryTimestampFunction("strftime", context);
|
424
512
|
|
425
513
|
// Add string casts
|
@@ -49,6 +49,8 @@ struct ICUDateFunc {
|
|
49
49
|
//! Sets the time zone for the calendar.
|
50
50
|
static void SetTimeZone(icu::Calendar *calendar, const string_t &tz_id);
|
51
51
|
//! Gets the timestamp from the calendar, throwing if it is not in range.
|
52
|
+
static bool TryGetTime(icu::Calendar *calendar, uint64_t micros, timestamp_t &result);
|
53
|
+
//! Gets the timestamp from the calendar, throwing if it is not in range.
|
52
54
|
static timestamp_t GetTime(icu::Calendar *calendar, uint64_t micros = 0);
|
53
55
|
//! Gets the timestamp from the calendar, assuming it is in range.
|
54
56
|
static timestamp_t GetTimeUnsafe(icu::Calendar *calendar, uint64_t micros = 0);
|
@@ -977,6 +977,7 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
|
|
977
977
|
throw IOException("Cannot use multiple \'**\' in one path");
|
978
978
|
}
|
979
979
|
|
980
|
+
bool recursive_search = false;
|
980
981
|
for (idx_t i = absolute_path ? 1 : 0; i < splits.size(); i++) {
|
981
982
|
bool is_last_chunk = i + 1 == splits.size();
|
982
983
|
bool has_glob = HasGlob(splits[i]);
|
@@ -988,12 +989,22 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
|
|
988
989
|
if (previous_directories.empty()) {
|
989
990
|
result.push_back(splits[i]);
|
990
991
|
} else {
|
991
|
-
|
992
|
-
|
992
|
+
if (recursive_search && is_last_chunk) {
|
993
|
+
for (auto &prev_directory : previous_directories) {
|
994
|
+
const string filename = JoinPath(prev_directory, splits[i]);
|
995
|
+
if (FileExists(filename) || DirectoryExists(filename)) {
|
996
|
+
result.push_back(filename);
|
997
|
+
}
|
998
|
+
}
|
999
|
+
} else {
|
1000
|
+
for (auto &prev_directory : previous_directories) {
|
1001
|
+
result.push_back(JoinPath(prev_directory, splits[i]));
|
1002
|
+
}
|
993
1003
|
}
|
994
1004
|
}
|
995
1005
|
} else {
|
996
1006
|
if (IsCrawl(splits[i])) {
|
1007
|
+
recursive_search = true;
|
997
1008
|
if (!is_last_chunk) {
|
998
1009
|
result = previous_directories;
|
999
1010
|
}
|