duckdb 0.7.2-dev2320.0 → 0.7.2-dev2410.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/data_chunk.cpp +1 -1
- package/src/duckdb/extension/icu/icu-extension.cpp +2 -2
- package/src/duckdb/extension/icu/icu-makedate.cpp +52 -0
- package/src/duckdb/extension/icu/icu-strptime.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +4 -0
- package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.cpp +28 -28
- package/src/duckdb/extension/icu/third_party/icu/i18n/dangical.h +4 -4
- package/src/duckdb/extension/json/include/json_common.hpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +1 -1
- package/src/duckdb/extension/json/json_functions.cpp +2 -2
- package/src/duckdb/extension/json/json_serializer.cpp +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/column_writer.cpp +3 -3
- package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +2 -2
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +2 -2
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/file_buffer.cpp +8 -0
- package/src/duckdb/src/common/operator/cast_operators.cpp +24 -25
- package/src/duckdb/src/common/radix_partitioning.cpp +34 -0
- package/src/duckdb/src/common/row_operations/row_heap_scatter.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +44 -124
- package/src/duckdb/src/common/sort/sorted_block.cpp +1 -1
- package/src/duckdb/src/common/types/bit.cpp +18 -18
- package/src/duckdb/src/common/types/blob.cpp +7 -7
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +1 -1
- package/src/duckdb/src/common/types/hash.cpp +1 -1
- package/src/duckdb/src/common/types/hyperloglog.cpp +1 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +2 -2
- package/src/duckdb/src/common/types/string_heap.cpp +2 -2
- package/src/duckdb/src/common/types/string_type.cpp +2 -2
- package/src/duckdb/src/common/types/timestamp.cpp +1 -1
- package/src/duckdb/src/common/types/vector.cpp +7 -7
- package/src/duckdb/src/execution/index/art/art_key.cpp +2 -2
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +144 -31
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +698 -0
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_asof_join.cpp +7 -1
- package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/bitagg.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/bitstring_agg.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/first.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/kurtosis.cpp +3 -2
- package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +2 -2
- package/src/duckdb/src/function/aggregate/distributive/skew.cpp +5 -1
- package/src/duckdb/src/function/aggregate/distributive/string_agg.cpp +1 -1
- package/src/duckdb/src/function/cast/list_casts.cpp +1 -1
- package/src/duckdb/src/function/cast/struct_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +3 -3
- package/src/duckdb/src/function/scalar/bit/bitstring.cpp +1 -1
- package/src/duckdb/src/function/scalar/blob/encode.cpp +1 -1
- package/src/duckdb/src/function/scalar/date/strftime.cpp +3 -3
- package/src/duckdb/src/function/scalar/generic/current_setting.cpp +1 -1
- package/src/duckdb/src/function/scalar/list/list_sort.cpp +30 -56
- package/src/duckdb/src/function/scalar/string/ascii.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/concat.cpp +6 -6
- package/src/duckdb/src/function/scalar/string/contains.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/damerau_levenshtein.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/hex.cpp +4 -4
- package/src/duckdb/src/function/scalar/string/instr.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/jaccard.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/jaro_winkler.cpp +5 -5
- package/src/duckdb/src/function/scalar/string/length.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/levenshtein.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/like.cpp +10 -11
- package/src/duckdb/src/function/scalar/string/mismatches.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/pad.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/prefix.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/printf.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +4 -4
- package/src/duckdb/src/function/scalar/string/repeat.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/replace.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/reverse.cpp +1 -1
- package/src/duckdb/src/function/scalar/string/starts_with.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/string_split.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/strip_accents.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/substring.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/suffix.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/translate.cpp +3 -3
- package/src/duckdb/src/function/scalar/string/trim.cpp +3 -3
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +1 -1
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -7
- package/src/duckdb/src/function/scalar/union/union_extract.cpp +1 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +2 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/crypto/md5.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/debug_initialize.hpp +17 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_type.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/radix.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +11 -60
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +8 -6
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/outer_join_marker.hpp +6 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_asof_join.hpp +93 -0
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/macro_function.hpp +17 -0
- package/src/duckdb/src/include/duckdb/function/scalar/regexp.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/capi/cast/utils.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +13 -3
- package/src/duckdb/src/include/duckdb/optimizer/cse_optimizer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/aggregate_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/alter_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/expression_binder/check_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/constant_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/group_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/insert_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/lateral_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/qualify_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder/relation_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/returning_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/update_binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/expression_binder/where_binder.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +12 -9
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/in_memory_block_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/partial_block_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/single_file_block_manager.hpp +11 -5
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
- package/src/duckdb/src/main/capi/cast/from_decimal-c.cpp +1 -1
- package/src/duckdb/src/main/capi/result-c.cpp +2 -2
- package/src/duckdb/src/main/config.cpp +26 -0
- package/src/duckdb/src/main/settings/settings.cpp +31 -8
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +9 -8
- package/src/duckdb/src/parser/expression/subquery_expression.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +33 -29
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +8 -10
- package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_collate_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +8 -7
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +6 -6
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +2 -2
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +6 -14
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +2 -5
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +8 -8
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +5 -5
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -2
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +2 -2
- package/src/duckdb/src/planner/binder/tableref/plan_expressionlistref.cpp +1 -1
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +4 -4
- package/src/duckdb/src/planner/expression.cpp +2 -1
- package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/alter_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/check_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/column_alias_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/insert_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/qualify_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/returning_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/update_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/where_binder.cpp +4 -4
- package/src/duckdb/src/planner/expression_binder.cpp +12 -12
- package/src/duckdb/src/storage/buffer/block_manager.cpp +1 -2
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +2 -2
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +1 -1
- package/src/duckdb/src/storage/compression/fsst.cpp +3 -3
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/meta_block_writer.cpp +4 -0
- package/src/duckdb/src/storage/partial_block_manager.cpp +11 -4
- package/src/duckdb/src/storage/single_file_block_manager.cpp +16 -9
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +5 -2
- package/src/duckdb/src/storage/statistics/string_stats.cpp +2 -2
- package/src/duckdb/src/storage/storage_manager.cpp +7 -2
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +21 -1
- package/src/duckdb/ub_src_execution_operator_join.cpp +2 -0
- package/src/statement.cpp +3 -3
@@ -78,7 +78,7 @@ struct LikeMatcher : public FunctionData {
|
|
78
78
|
}
|
79
79
|
|
80
80
|
bool Match(string_t &str) {
|
81
|
-
auto str_data = (const unsigned char *)str.
|
81
|
+
auto str_data = (const unsigned char *)str.GetData();
|
82
82
|
auto str_len = str.GetSize();
|
83
83
|
idx_t segment_idx = 0;
|
84
84
|
idx_t end_idx = segments.size() - 1;
|
@@ -213,11 +213,11 @@ bool LikeOperatorFunction(const char *s, idx_t slen, const char *pattern, idx_t
|
|
213
213
|
}
|
214
214
|
|
215
215
|
bool LikeOperatorFunction(string_t &s, string_t &pat) {
|
216
|
-
return LikeOperatorFunction(s.
|
216
|
+
return LikeOperatorFunction(s.GetData(), s.GetSize(), pat.GetData(), pat.GetSize());
|
217
217
|
}
|
218
218
|
|
219
219
|
bool LikeOperatorFunction(string_t &s, string_t &pat, char escape) {
|
220
|
-
return LikeOperatorFunction(s.
|
220
|
+
return LikeOperatorFunction(s.GetData(), s.GetSize(), pat.GetData(), pat.GetSize(), escape);
|
221
221
|
}
|
222
222
|
|
223
223
|
bool LikeFun::Glob(const char *string, idx_t slen, const char *pattern, idx_t plen, bool allow_question_mark) {
|
@@ -362,15 +362,14 @@ static char GetEscapeChar(string_t escape) {
|
|
362
362
|
if (escape.GetSize() > 1) {
|
363
363
|
throw SyntaxException("Invalid escape string. Escape string must be empty or one character.");
|
364
364
|
}
|
365
|
-
return escape.GetSize() == 0 ? '\0' : *escape.
|
365
|
+
return escape.GetSize() == 0 ? '\0' : *escape.GetData();
|
366
366
|
}
|
367
367
|
|
368
368
|
struct LikeEscapeOperator {
|
369
369
|
template <class TA, class TB, class TC>
|
370
370
|
static inline bool Operation(TA str, TB pattern, TC escape) {
|
371
371
|
char escape_char = GetEscapeChar(escape);
|
372
|
-
return LikeOperatorFunction(str.
|
373
|
-
escape_char);
|
372
|
+
return LikeOperatorFunction(str.GetData(), str.GetSize(), pattern.GetData(), pattern.GetSize(), escape_char);
|
374
373
|
}
|
375
374
|
};
|
376
375
|
|
@@ -389,9 +388,9 @@ struct LikeOperator {
|
|
389
388
|
};
|
390
389
|
|
391
390
|
bool ILikeOperatorFunction(string_t &str, string_t &pattern, char escape = '\0') {
|
392
|
-
auto str_data = str.
|
391
|
+
auto str_data = str.GetData();
|
393
392
|
auto str_size = str.GetSize();
|
394
|
-
auto pat_data = pattern.
|
393
|
+
auto pat_data = pattern.GetData();
|
395
394
|
auto pat_size = pattern.GetSize();
|
396
395
|
|
397
396
|
// lowercase both the str and the pattern
|
@@ -446,8 +445,8 @@ struct NotILikeOperator {
|
|
446
445
|
struct ILikeOperatorASCII {
|
447
446
|
template <class TA, class TB, class TR>
|
448
447
|
static inline TR Operation(TA str, TB pattern) {
|
449
|
-
return TemplatedLikeOperator<'%', '_', false, ASCIILCaseReader>(
|
450
|
-
|
448
|
+
return TemplatedLikeOperator<'%', '_', false, ASCIILCaseReader>(str.GetData(), str.GetSize(), pattern.GetData(),
|
449
|
+
pattern.GetSize(), '\0');
|
451
450
|
}
|
452
451
|
};
|
453
452
|
|
@@ -461,7 +460,7 @@ struct NotILikeOperatorASCII {
|
|
461
460
|
struct GlobOperator {
|
462
461
|
template <class TA, class TB, class TR>
|
463
462
|
static inline TR Operation(TA str, TB pattern) {
|
464
|
-
return LikeFun::Glob(str.
|
463
|
+
return LikeFun::Glob(str.GetData(), str.GetSize(), pattern.GetData(), pattern.GetSize());
|
465
464
|
}
|
466
465
|
};
|
467
466
|
|
@@ -18,8 +18,8 @@ static int64_t MismatchesScalarFunction(Vector &result, const string_t str, stri
|
|
18
18
|
}
|
19
19
|
|
20
20
|
idx_t mismatches = 0;
|
21
|
-
auto str_str = str.
|
22
|
-
auto tgt_str = tgt.
|
21
|
+
auto str_str = str.GetData();
|
22
|
+
auto tgt_str = tgt.GetData();
|
23
23
|
|
24
24
|
for (idx_t idx = 0; idx < str_len; ++idx) {
|
25
25
|
if (str_str[idx] != tgt_str[idx]) {
|
@@ -7,7 +7,7 @@ namespace duckdb {
|
|
7
7
|
struct NFCNormalizeOperator {
|
8
8
|
template <class INPUT_TYPE, class RESULT_TYPE>
|
9
9
|
static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
|
10
|
-
auto input_data = input.
|
10
|
+
auto input_data = input.GetData();
|
11
11
|
auto input_length = input.GetSize();
|
12
12
|
if (StripAccentsFun::IsAscii(input_data, input_length)) {
|
13
13
|
return input;
|
@@ -27,7 +27,7 @@ static pair<idx_t, idx_t> PadCountChars(const idx_t len, const char *data, const
|
|
27
27
|
|
28
28
|
static bool InsertPadding(const idx_t len, const string_t &pad, vector<char> &result) {
|
29
29
|
// Copy the padding until the output is long enough
|
30
|
-
auto data = pad.
|
30
|
+
auto data = pad.GetData();
|
31
31
|
auto size = pad.GetSize();
|
32
32
|
|
33
33
|
// Check whether we need data that we don't have
|
@@ -63,7 +63,7 @@ static string_t LeftPadFunction(const string_t &str, const int32_t len, const st
|
|
63
63
|
result.clear();
|
64
64
|
|
65
65
|
// Get information about the base string
|
66
|
-
auto data_str = str.
|
66
|
+
auto data_str = str.GetData();
|
67
67
|
auto size_str = str.GetSize();
|
68
68
|
|
69
69
|
// Count how much of str will fit in the output
|
@@ -92,7 +92,7 @@ static string_t RightPadFunction(const string_t &str, const int32_t len, const s
|
|
92
92
|
result.clear();
|
93
93
|
|
94
94
|
// Get information about the base string
|
95
|
-
auto data_str = str.
|
95
|
+
auto data_str = str.GetData();
|
96
96
|
auto size_str = str.GetSize();
|
97
97
|
|
98
98
|
// Count how much of str will fit in the output
|
@@ -46,8 +46,8 @@ static bool PrefixFunction(const string_t &str, const string_t &pattern) {
|
|
46
46
|
}
|
47
47
|
}
|
48
48
|
// compare the rest of the prefix
|
49
|
-
const char *str_data = str.
|
50
|
-
const char *patt_data = pattern.
|
49
|
+
const char *str_data = str.GetData();
|
50
|
+
const char *patt_data = pattern.GetData();
|
51
51
|
D_ASSERT(patt_length <= str_length);
|
52
52
|
for (idx_t i = string_t::PREFIX_LENGTH; i < patt_length; ++i) {
|
53
53
|
if (str_data[i] != patt_data[i]) {
|
@@ -138,7 +138,7 @@ static void PrintfFunction(DataChunk &args, ExpressionState &state, Vector &resu
|
|
138
138
|
case LogicalTypeId::VARCHAR: {
|
139
139
|
auto arg_data = FlatVector::GetData<string_t>(col);
|
140
140
|
auto string_view =
|
141
|
-
duckdb_fmt::basic_string_view<char>(arg_data[arg_idx].
|
141
|
+
duckdb_fmt::basic_string_view<char>(arg_data[arg_idx].GetData(), arg_data[arg_idx].GetSize());
|
142
142
|
format_args.emplace_back(duckdb_fmt::internal::make_arg<CTX>(string_view));
|
143
143
|
break;
|
144
144
|
}
|
@@ -86,7 +86,7 @@ void ExtractSingleTuple(const string_t &string, duckdb_re2::RE2 &pattern, int32_
|
|
86
86
|
idx_t child_idx = current_list_size;
|
87
87
|
if (match_group.empty()) {
|
88
88
|
// This group was not matched
|
89
|
-
list_content[child_idx] = string_t(string.
|
89
|
+
list_content[child_idx] = string_t(string.GetData(), 0);
|
90
90
|
if (match_group.begin() == nullptr) {
|
91
91
|
// This group is optional
|
92
92
|
child_validity.SetInvalid(child_idx);
|
@@ -94,9 +94,9 @@ void ExtractSingleTuple(const string_t &string, duckdb_re2::RE2 &pattern, int32_
|
|
94
94
|
} else {
|
95
95
|
// Every group is a substring of the original, we can find out the offset using the pointer
|
96
96
|
// the 'match_group' address is guaranteed to be bigger than that of the source
|
97
|
-
D_ASSERT((const char *)match_group.begin() >= string.
|
98
|
-
idx_t offset = match_group.begin() - string.
|
99
|
-
list_content[child_idx] = string_t(string.
|
97
|
+
D_ASSERT((const char *)match_group.begin() >= string.GetData());
|
98
|
+
idx_t offset = match_group.begin() - string.GetData();
|
99
|
+
list_content[child_idx] = string_t(string.GetData() + offset, match_group.size());
|
100
100
|
}
|
101
101
|
current_list_size++;
|
102
102
|
if (startpos > input.size()) {
|
@@ -10,7 +10,7 @@ namespace duckdb {
|
|
10
10
|
|
11
11
|
static string_t RepeatScalarFunction(const string_t &str, const int64_t cnt, vector<char> &result) {
|
12
12
|
// Get information about the repeated string
|
13
|
-
auto input_str = str.
|
13
|
+
auto input_str = str.GetData();
|
14
14
|
auto size_str = str.GetSize();
|
15
15
|
|
16
16
|
// Reuse the buffer
|
@@ -29,13 +29,13 @@ static idx_t NextNeedle(const char *input_haystack, idx_t size_haystack, const c
|
|
29
29
|
static string_t ReplaceScalarFunction(const string_t &haystack, const string_t &needle, const string_t &thread,
|
30
30
|
vector<char> &result) {
|
31
31
|
// Get information about the needle, the haystack and the "thread"
|
32
|
-
auto input_haystack = haystack.
|
32
|
+
auto input_haystack = haystack.GetData();
|
33
33
|
auto size_haystack = haystack.GetSize();
|
34
34
|
|
35
|
-
auto input_needle = needle.
|
35
|
+
auto input_needle = needle.GetData();
|
36
36
|
auto size_needle = needle.GetSize();
|
37
37
|
|
38
|
-
auto input_thread = thread.
|
38
|
+
auto input_thread = thread.GetData();
|
39
39
|
auto size_thread = thread.GetSize();
|
40
40
|
|
41
41
|
// Reuse the buffer
|
@@ -32,7 +32,7 @@ static void StrReverseUnicode(const char *input, idx_t n, char *output) {
|
|
32
32
|
struct ReverseOperator {
|
33
33
|
template <class INPUT_TYPE, class RESULT_TYPE>
|
34
34
|
static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
|
35
|
-
auto input_data = input.
|
35
|
+
auto input_data = input.GetData();
|
36
36
|
auto input_length = input.GetSize();
|
37
37
|
|
38
38
|
auto target = StringVector::EmptyString(result, input_length);
|
@@ -17,9 +17,9 @@ static bool StartsWith(const unsigned char *haystack, idx_t haystack_size, const
|
|
17
17
|
}
|
18
18
|
|
19
19
|
static bool StartsWith(const string_t &haystack_s, const string_t &needle_s) {
|
20
|
-
auto haystack = (const unsigned char *)haystack_s.
|
20
|
+
auto haystack = (const unsigned char *)haystack_s.GetData();
|
21
21
|
auto haystack_size = haystack_s.GetSize();
|
22
|
-
auto needle = (const unsigned char *)needle_s.
|
22
|
+
auto needle = (const unsigned char *)needle_s.GetData();
|
23
23
|
auto needle_size = needle_s.GetSize();
|
24
24
|
if (needle_size == 0) {
|
25
25
|
// empty needle: always true
|
@@ -68,9 +68,9 @@ struct RegexpStringSplit {
|
|
68
68
|
struct StringSplitter {
|
69
69
|
template <class OP>
|
70
70
|
static idx_t Split(string_t input, string_t delim, StringSplitInput &state, void *data) {
|
71
|
-
auto input_data = input.
|
71
|
+
auto input_data = input.GetData();
|
72
72
|
auto input_size = input.GetSize();
|
73
|
-
auto delim_data = delim.
|
73
|
+
auto delim_data = delim.GetData();
|
74
74
|
auto delim_size = delim.GetSize();
|
75
75
|
idx_t list_idx = 0;
|
76
76
|
while (input_size > 0) {
|
@@ -135,7 +135,7 @@ static void StringSplitExecutor(DataChunk &args, ExpressionState &state, Vector
|
|
135
135
|
StringSplitInput split_input(result, child_entry, total_splits);
|
136
136
|
if (!delim_data.validity.RowIsValid(delim_idx)) {
|
137
137
|
// delim is NULL: copy the complete entry
|
138
|
-
split_input.AddSplit(inputs[input_idx].
|
138
|
+
split_input.AddSplit(inputs[input_idx].GetData(), inputs[input_idx].GetSize(), 0);
|
139
139
|
list_struct_data[i].length = 1;
|
140
140
|
list_struct_data[i].offset = total_splits;
|
141
141
|
total_splits++;
|
@@ -17,12 +17,12 @@ bool StripAccentsFun::IsAscii(const char *input, idx_t n) {
|
|
17
17
|
struct StripAccentsOperator {
|
18
18
|
template <class INPUT_TYPE, class RESULT_TYPE>
|
19
19
|
static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
|
20
|
-
if (StripAccentsFun::IsAscii(input.
|
20
|
+
if (StripAccentsFun::IsAscii(input.GetData(), input.GetSize())) {
|
21
21
|
return input;
|
22
22
|
}
|
23
23
|
|
24
24
|
// non-ascii, perform collation
|
25
|
-
auto stripped = utf8proc_remove_accents((const utf8proc_uint8_t *)input.
|
25
|
+
auto stripped = utf8proc_remove_accents((const utf8proc_uint8_t *)input.GetData(), input.GetSize());
|
26
26
|
auto result_str = StringVector::AddString(result, (const char *)stripped);
|
27
27
|
free(stripped);
|
28
28
|
return result_str;
|
@@ -82,7 +82,7 @@ bool SubstringStartEnd(int64_t input_size, int64_t offset, int64_t length, int64
|
|
82
82
|
}
|
83
83
|
|
84
84
|
string_t SubstringASCII(Vector &result, string_t input, int64_t offset, int64_t length) {
|
85
|
-
auto input_data = input.
|
85
|
+
auto input_data = input.GetData();
|
86
86
|
auto input_size = input.GetSize();
|
87
87
|
|
88
88
|
AssertInSupportedRange(input_size, offset, length);
|
@@ -95,7 +95,7 @@ string_t SubstringASCII(Vector &result, string_t input, int64_t offset, int64_t
|
|
95
95
|
}
|
96
96
|
|
97
97
|
string_t SubstringFun::SubstringUnicode(Vector &result, string_t input, int64_t offset, int64_t length) {
|
98
|
-
auto input_data = input.
|
98
|
+
auto input_data = input.GetData();
|
99
99
|
auto input_size = input.GetSize();
|
100
100
|
|
101
101
|
AssertInSupportedRange(input_size, offset, length);
|
@@ -190,7 +190,7 @@ string_t SubstringFun::SubstringUnicode(Vector &result, string_t input, int64_t
|
|
190
190
|
}
|
191
191
|
|
192
192
|
string_t SubstringFun::SubstringGrapheme(Vector &result, string_t input, int64_t offset, int64_t length) {
|
193
|
-
auto input_data = input.
|
193
|
+
auto input_data = input.GetData();
|
194
194
|
auto input_size = input.GetSize();
|
195
195
|
|
196
196
|
AssertInSupportedRange(input_size, offset, length);
|
@@ -21,8 +21,8 @@ static bool SuffixFunction(const string_t &str, const string_t &suffix) {
|
|
21
21
|
return false;
|
22
22
|
}
|
23
23
|
|
24
|
-
auto suffix_data = suffix.
|
25
|
-
auto str_data = str.
|
24
|
+
auto suffix_data = suffix.GetData();
|
25
|
+
auto str_data = str.GetData();
|
26
26
|
int32_t suf_idx = suffix_size - 1;
|
27
27
|
idx_t str_idx = str_size - 1;
|
28
28
|
for (; suf_idx >= 0; --suf_idx, --str_idx) {
|
@@ -16,13 +16,13 @@ namespace duckdb {
|
|
16
16
|
static string_t TranslateScalarFunction(const string_t &haystack, const string_t &needle, const string_t &thread,
|
17
17
|
vector<char> &result) {
|
18
18
|
// Get information about the haystack, the needle and the "thread"
|
19
|
-
auto input_haystack = haystack.
|
19
|
+
auto input_haystack = haystack.GetData();
|
20
20
|
auto size_haystack = haystack.GetSize();
|
21
21
|
|
22
|
-
auto input_needle = needle.
|
22
|
+
auto input_needle = needle.GetData();
|
23
23
|
auto size_needle = needle.GetSize();
|
24
24
|
|
25
|
-
auto input_thread = thread.
|
25
|
+
auto input_thread = thread.GetData();
|
26
26
|
auto size_thread = thread.GetSize();
|
27
27
|
|
28
28
|
// Reuse the buffer
|
@@ -13,7 +13,7 @@ template <bool LTRIM, bool RTRIM>
|
|
13
13
|
struct TrimOperator {
|
14
14
|
template <class INPUT_TYPE, class RESULT_TYPE>
|
15
15
|
static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) {
|
16
|
-
auto data = input.
|
16
|
+
auto data = input.GetData();
|
17
17
|
auto size = input.GetSize();
|
18
18
|
|
19
19
|
utf8proc_int32_t codepoint;
|
@@ -64,7 +64,7 @@ static void UnaryTrimFunction(DataChunk &args, ExpressionState &state, Vector &r
|
|
64
64
|
}
|
65
65
|
|
66
66
|
static void GetIgnoredCodepoints(string_t ignored, unordered_set<utf8proc_int32_t> &ignored_codepoints) {
|
67
|
-
auto dataptr = (utf8proc_uint8_t *)ignored.
|
67
|
+
auto dataptr = (utf8proc_uint8_t *)ignored.GetData();
|
68
68
|
auto size = ignored.GetSize();
|
69
69
|
idx_t pos = 0;
|
70
70
|
while (pos < size) {
|
@@ -78,7 +78,7 @@ template <bool LTRIM, bool RTRIM>
|
|
78
78
|
static void BinaryTrimFunction(DataChunk &input, ExpressionState &state, Vector &result) {
|
79
79
|
BinaryExecutor::Execute<string_t, string_t, string_t>(
|
80
80
|
input.data[0], input.data[1], result, input.size(), [&](string_t input, string_t ignored) {
|
81
|
-
auto data = input.
|
81
|
+
auto data = input.GetData();
|
82
82
|
auto size = input.GetSize();
|
83
83
|
|
84
84
|
unordered_set<utf8proc_int32_t> ignored_codepoints;
|
@@ -62,7 +62,7 @@ static unique_ptr<FunctionData> StructExtractBind(ClientContext &context, Scalar
|
|
62
62
|
if (key_child->return_type.id() != LogicalTypeId::VARCHAR || !key_child->IsFoldable()) {
|
63
63
|
throw BinderException("Key name for struct_extract needs to be a constant string");
|
64
64
|
}
|
65
|
-
Value key_val = ExpressionExecutor::EvaluateScalar(context, *key_child
|
65
|
+
Value key_val = ExpressionExecutor::EvaluateScalar(context, *key_child);
|
66
66
|
D_ASSERT(key_val.type().id() == LogicalTypeId::VARCHAR);
|
67
67
|
auto &key_str = StringValue::Get(key_val);
|
68
68
|
if (key_val.IsNull() || key_str.empty()) {
|
@@ -91,7 +91,7 @@ static void AggregateStateFinalize(DataChunk &input, ExpressionState &state_p, V
|
|
91
91
|
|
92
92
|
if (state_data.validity.RowIsValid(state_idx)) {
|
93
93
|
D_ASSERT(state_entry->GetSize() == bind_data.state_size);
|
94
|
-
memcpy((void *)target_ptr, state_entry->
|
94
|
+
memcpy((void *)target_ptr, state_entry->GetData(), bind_data.state_size);
|
95
95
|
} else {
|
96
96
|
// create a dummy state because finalize does not understand NULLs in its input
|
97
97
|
// we put the NULL back in explicitly below
|
@@ -145,13 +145,11 @@ static void AggregateStateCombine(DataChunk &input, ExpressionState &state_p, Ve
|
|
145
145
|
continue;
|
146
146
|
}
|
147
147
|
if (state0_data.validity.RowIsValid(state0_idx) && !state1_data.validity.RowIsValid(state1_idx)) {
|
148
|
-
result_ptr[i] =
|
149
|
-
StringVector::AddStringOrBlob(result, (const char *)state0.GetDataUnsafe(), bind_data.state_size);
|
148
|
+
result_ptr[i] = StringVector::AddStringOrBlob(result, (const char *)state0.GetData(), bind_data.state_size);
|
150
149
|
continue;
|
151
150
|
}
|
152
151
|
if (!state0_data.validity.RowIsValid(state0_idx) && state1_data.validity.RowIsValid(state1_idx)) {
|
153
|
-
result_ptr[i] =
|
154
|
-
StringVector::AddStringOrBlob(result, (const char *)state1.GetDataUnsafe(), bind_data.state_size);
|
152
|
+
result_ptr[i] = StringVector::AddStringOrBlob(result, (const char *)state1.GetData(), bind_data.state_size);
|
155
153
|
continue;
|
156
154
|
}
|
157
155
|
|
@@ -161,8 +159,8 @@ static void AggregateStateCombine(DataChunk &input, ExpressionState &state_p, Ve
|
|
161
159
|
state0.GetSize(), state1.GetSize());
|
162
160
|
}
|
163
161
|
|
164
|
-
memcpy(local_state.state_buffer0.get(), state0.
|
165
|
-
memcpy(local_state.state_buffer1.get(), state1.
|
162
|
+
memcpy(local_state.state_buffer0.get(), state0.GetData(), bind_data.state_size);
|
163
|
+
memcpy(local_state.state_buffer1.get(), state1.GetData(), bind_data.state_size);
|
166
164
|
|
167
165
|
AggregateInputData aggr_input_data(nullptr, Allocator::DefaultAllocator());
|
168
166
|
bind_data.aggr.combine(local_state.state_vector0, local_state.state_vector1, aggr_input_data, 1);
|
@@ -60,7 +60,7 @@ static unique_ptr<FunctionData> UnionExtractBind(ClientContext &context, ScalarF
|
|
60
60
|
if (key_child->return_type.id() != LogicalTypeId::VARCHAR || !key_child->IsFoldable()) {
|
61
61
|
throw BinderException("Key name for union_extract needs to be a constant string");
|
62
62
|
}
|
63
|
-
Value key_val = ExpressionExecutor::EvaluateScalar(context, *key_child
|
63
|
+
Value key_val = ExpressionExecutor::EvaluateScalar(context, *key_child);
|
64
64
|
D_ASSERT(key_val.type().id() == LogicalTypeId::VARCHAR);
|
65
65
|
auto &key_str = StringValue::Get(key_val);
|
66
66
|
if (key_val.IsNull() || key_str.empty()) {
|
@@ -357,7 +357,7 @@ static void WriteCSVSink(ExecutionContext &context, FunctionData &bind_data, Glo
|
|
357
357
|
// FIXME: we could gain some performance here by checking for certain types if they ever require quotes
|
358
358
|
// (e.g. integers only require quotes if the delimiter is a number, decimals only require quotes if the
|
359
359
|
// delimiter is a number or "." character)
|
360
|
-
WriteQuotedString(writer, csv_data, str_value.
|
360
|
+
WriteQuotedString(writer, csv_data, str_value.GetData(), str_value.GetSize(),
|
361
361
|
csv_data.options.force_quote[col_idx]);
|
362
362
|
}
|
363
363
|
writer.WriteBufferData(csv_data.newline);
|
@@ -239,7 +239,7 @@ struct MacroExtractor {
|
|
239
239
|
|
240
240
|
static Value GetMacroDefinition(ScalarMacroCatalogEntry &entry, idx_t offset) {
|
241
241
|
D_ASSERT(entry.function->type == MacroType::SCALAR_MACRO);
|
242
|
-
auto &func =
|
242
|
+
auto &func = entry.function->Cast<ScalarMacroFunction>();
|
243
243
|
return func.expression->ToString();
|
244
244
|
}
|
245
245
|
|
@@ -295,7 +295,7 @@ struct TableMacroExtractor {
|
|
295
295
|
|
296
296
|
static Value GetMacroDefinition(TableMacroCatalogEntry &entry, idx_t offset) {
|
297
297
|
if (entry.function->type == MacroType::SCALAR_MACRO) {
|
298
|
-
auto &func =
|
298
|
+
auto &func = entry.function->Cast<ScalarMacroFunction>();
|
299
299
|
return func.expression->ToString();
|
300
300
|
}
|
301
301
|
return Value();
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.7.2-
|
2
|
+
#define DUCKDB_VERSION "0.7.2-dev2410"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "e413e0b40d"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -26,7 +26,7 @@ public:
|
|
26
26
|
}
|
27
27
|
void Add(const char *data);
|
28
28
|
void Add(string_t string) {
|
29
|
-
MD5Update((const_data_ptr_t)string.
|
29
|
+
MD5Update((const_data_ptr_t)string.GetData(), string.GetSize());
|
30
30
|
}
|
31
31
|
void Add(const string &data) {
|
32
32
|
MD5Update((const_data_ptr_t)data.c_str(), data.size());
|
@@ -0,0 +1,17 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/common/enums/debug_initialize.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/constants.hpp"
|
12
|
+
|
13
|
+
namespace duckdb {
|
14
|
+
|
15
|
+
enum class DebugInitialize : uint8_t { NO_INITIALIZE = 0, DEBUG_ZERO_INITIALIZE = 1, DEBUG_ONE_INITIALIZE = 2 };
|
16
|
+
|
17
|
+
} // namespace duckdb
|
@@ -17,4 +17,12 @@ enum class OrderType : uint8_t { INVALID = 0, ORDER_DEFAULT = 1, ASCENDING = 2,
|
|
17
17
|
|
18
18
|
enum class OrderByNullType : uint8_t { INVALID = 0, ORDER_DEFAULT = 1, NULLS_FIRST = 2, NULLS_LAST = 3 };
|
19
19
|
|
20
|
+
enum class DefaultOrderByNullType : uint8_t {
|
21
|
+
INVALID = 0,
|
22
|
+
NULLS_FIRST = 2,
|
23
|
+
NULLS_LAST = 3,
|
24
|
+
NULLS_FIRST_ON_ASC_LAST_ON_DESC = 4,
|
25
|
+
NULLS_LAST_ON_ASC_FIRST_ON_DESC = 5
|
26
|
+
};
|
27
|
+
|
20
28
|
} // namespace duckdb
|
@@ -9,6 +9,7 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/constants.hpp"
|
12
|
+
#include "duckdb/common/enums/debug_initialize.hpp"
|
12
13
|
|
13
14
|
namespace duckdb {
|
14
15
|
class Allocator;
|
@@ -62,6 +63,8 @@ public:
|
|
62
63
|
|
63
64
|
MemoryRequirement CalculateMemory(uint64_t user_size);
|
64
65
|
|
66
|
+
void Initialize(DebugInitialize info);
|
67
|
+
|
65
68
|
protected:
|
66
69
|
//! The pointer to the internal buffer that will be read or written, including the buffer header
|
67
70
|
data_ptr_t internal_buffer;
|
@@ -50,7 +50,7 @@ public:
|
|
50
50
|
|
51
51
|
static inline void EncodeStringDataPrefix(data_ptr_t dataptr, string_t value, idx_t prefix_len) {
|
52
52
|
auto len = value.GetSize();
|
53
|
-
memcpy(dataptr, value.
|
53
|
+
memcpy(dataptr, value.GetData(), MinValue(len, prefix_len));
|
54
54
|
if (len < prefix_len) {
|
55
55
|
memset(dataptr + len, '\0', prefix_len - len);
|
56
56
|
}
|
@@ -49,6 +49,9 @@ public:
|
|
49
49
|
//! Select using a cutoff on the radix bits of the hash
|
50
50
|
static idx_t Select(Vector &hashes, const SelectionVector *sel, idx_t count, idx_t radix_bits, idx_t cutoff,
|
51
51
|
SelectionVector *true_sel, SelectionVector *false_sel);
|
52
|
+
|
53
|
+
//! Convert hashes to bins
|
54
|
+
static void HashesToBins(Vector &hashes, idx_t radix_bits, Vector &bins, idx_t count);
|
52
55
|
};
|
53
56
|
|
54
57
|
//! Templated radix partitioning constants, can be templated to the number of radix bits
|
@@ -25,6 +25,8 @@ public:
|
|
25
25
|
PartitionGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
|
26
26
|
const Types &payload_types, bool external);
|
27
27
|
|
28
|
+
int ComparePartitions(const SBIterator &left, const SBIterator &right) const;
|
29
|
+
|
28
30
|
void ComputeMasks(ValidityMask &partition_mask, ValidityMask &order_mask);
|
29
31
|
|
30
32
|
GlobalSortStatePtr global_sort;
|
@@ -43,8 +45,12 @@ public:
|
|
43
45
|
using GroupingPartition = unique_ptr<PartitionedColumnData>;
|
44
46
|
using GroupingAppend = unique_ptr<PartitionedColumnDataAppendState>;
|
45
47
|
|
46
|
-
|
47
|
-
|
48
|
+
static void GenerateOrderings(Orders &partitions, Orders &orders,
|
49
|
+
const vector<unique_ptr<Expression>> &partition_bys, const Orders &order_bys,
|
50
|
+
const vector<unique_ptr<BaseStatistics>> &partitions_stats);
|
51
|
+
|
52
|
+
PartitionGlobalSinkState(ClientContext &context, const vector<unique_ptr<Expression>> &partition_bys,
|
53
|
+
const vector<BoundOrderByNode> &order_bys, const Types &payload_types,
|
48
54
|
const vector<unique_ptr<BaseStatistics>> &partitions_stats, idx_t estimated_cardinality);
|
49
55
|
|
50
56
|
void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
@@ -68,6 +74,8 @@ public:
|
|
68
74
|
const Types payload_types;
|
69
75
|
vector<HashGroupPtr> hash_groups;
|
70
76
|
bool external;
|
77
|
+
// Reverse lookup from hash bins to non-empty hash groups
|
78
|
+
vector<size_t> bin_groups;
|
71
79
|
|
72
80
|
// OVER() (no sorting)
|
73
81
|
unique_ptr<RowDataCollection> rows;
|
@@ -121,7 +129,7 @@ class PartitionGlobalMergeState {
|
|
121
129
|
public:
|
122
130
|
using GroupDataPtr = unique_ptr<ColumnDataCollection>;
|
123
131
|
|
124
|
-
|
132
|
+
PartitionGlobalMergeState(PartitionGlobalSinkState &sink, GroupDataPtr group_data, hash_t hash_bin);
|
125
133
|
|
126
134
|
bool IsSorted() const {
|
127
135
|
lock_guard<mutex> guard(lock);
|
@@ -187,61 +195,4 @@ public:
|
|
187
195
|
void Schedule() override;
|
188
196
|
};
|
189
197
|
|
190
|
-
class PartitionGlobalSourceState {
|
191
|
-
public:
|
192
|
-
explicit PartitionGlobalSourceState(PartitionGlobalSinkState &gsink_p) : gsink(gsink_p), next_bin(0) {
|
193
|
-
}
|
194
|
-
|
195
|
-
PartitionGlobalSinkState &gsink;
|
196
|
-
//! The output read position.
|
197
|
-
atomic<idx_t> next_bin;
|
198
|
-
|
199
|
-
public:
|
200
|
-
idx_t MaxThreads() {
|
201
|
-
// If there is only one partition, we have to process it on one thread.
|
202
|
-
if (!gsink.grouping_data) {
|
203
|
-
return 1;
|
204
|
-
}
|
205
|
-
|
206
|
-
// If there is not a lot of data, process serially.
|
207
|
-
if (gsink.count < STANDARD_ROW_GROUPS_SIZE) {
|
208
|
-
return 1;
|
209
|
-
}
|
210
|
-
|
211
|
-
return gsink.hash_groups.size();
|
212
|
-
}
|
213
|
-
};
|
214
|
-
|
215
|
-
// Per-thread read state
|
216
|
-
class PartitionLocalSourceState {
|
217
|
-
public:
|
218
|
-
using HashGroupPtr = unique_ptr<PartitionGlobalHashGroup>;
|
219
|
-
|
220
|
-
explicit PartitionLocalSourceState(PartitionGlobalSinkState &gstate_p);
|
221
|
-
|
222
|
-
void MaterializeSortedData();
|
223
|
-
idx_t GeneratePartition(const idx_t hash_bin);
|
224
|
-
|
225
|
-
PartitionGlobalSinkState &gstate;
|
226
|
-
|
227
|
-
//! The read partition
|
228
|
-
idx_t hash_bin;
|
229
|
-
HashGroupPtr hash_group;
|
230
|
-
|
231
|
-
//! The generated input chunks
|
232
|
-
unique_ptr<RowDataCollection> rows;
|
233
|
-
unique_ptr<RowDataCollection> heap;
|
234
|
-
RowLayout layout;
|
235
|
-
//! The partition boundary mask
|
236
|
-
vector<validity_t> partition_bits;
|
237
|
-
ValidityMask partition_mask;
|
238
|
-
//! The order boundary mask
|
239
|
-
vector<validity_t> order_bits;
|
240
|
-
ValidityMask order_mask;
|
241
|
-
//! The read cursor
|
242
|
-
unique_ptr<RowDataCollectionScanner> scanner;
|
243
|
-
//! Buffer for the inputs
|
244
|
-
DataChunk input_chunk;
|
245
|
-
};
|
246
|
-
|
247
198
|
} // namespace duckdb
|