duckdb 0.7.2-dev2995.0 → 0.7.2-dev3117.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
- package/src/duckdb/extension/json/include/json_serializer.hpp +8 -1
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +1 -3
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -3
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +3 -2
- package/src/duckdb/extension/parquet/parquet-extension.cpp +9 -7
- package/src/duckdb/src/common/enum_util.cpp +5908 -0
- package/src/duckdb/src/common/enums/expression_type.cpp +216 -4
- package/src/duckdb/src/common/enums/join_type.cpp +6 -5
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/exception.cpp +1 -1
- package/src/duckdb/src/common/exception_format_value.cpp +2 -2
- package/src/duckdb/src/common/multi_file_reader.cpp +14 -0
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +143 -0
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +160 -0
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +3 -3
- package/src/duckdb/src/common/types.cpp +11 -10
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +4 -4
- package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +2 -1
- package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +2 -3
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +2 -2
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -2
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +2 -1
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -1
- package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +165 -0
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +9 -7
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +25 -4
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -1
- package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +5 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +85 -29
- package/src/duckdb/src/function/table/read_csv.cpp +17 -11
- package/src/duckdb/src/function/table/system/duckdb_settings.cpp +2 -1
- package/src/duckdb/src/function/table/system/duckdb_types.cpp +2 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +958 -0
- package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +4 -4
- package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +44 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +93 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +92 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/vector.hpp +61 -14
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +68 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -3
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +32 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -2
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/database.hpp +1 -3
- package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +24 -0
- package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +1 -1
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +26 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/main/client_context.cpp +1 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/database.cpp +11 -23
- package/src/duckdb/src/main/database_path_and_type.cpp +23 -0
- package/src/duckdb/src/main/relation/join_relation.cpp +2 -1
- package/src/duckdb/src/main/relation/setop_relation.cpp +2 -3
- package/src/duckdb/src/parser/expression/window_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +2 -2
- package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
- package/src/duckdb/src/parser/result_modifier.cpp +2 -2
- package/src/duckdb/src/parser/statement/select_statement.cpp +0 -44
- package/src/duckdb/src/parser/tableref/joinref.cpp +3 -3
- package/src/duckdb/src/parser/tableref.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -3
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +6 -0
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +4 -1
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +7 -0
- package/src/duckdb/src/planner/operator/logical_aggregate.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +20 -0
- package/src/duckdb/src/verification/statement_verifier.cpp +3 -0
- package/src/duckdb/ub_src_common.cpp +2 -2
- package/src/duckdb/ub_src_common_serializer.cpp +4 -2
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
- package/src/duckdb/ub_src_main.cpp +2 -0
- package/src/duckdb/src/common/serializer/enum_serializer.cpp +0 -1180
- package/src/duckdb/src/common/vector.cpp +0 -12
- package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +0 -113
@@ -29,13 +29,13 @@ enum class JoinType : uint8_t {
|
|
29
29
|
// (and NULL if no partner is found)
|
30
30
|
};
|
31
31
|
|
32
|
-
//! Convert join type to string
|
33
|
-
DUCKDB_API string JoinTypeToString(JoinType type);
|
34
|
-
|
35
32
|
//! True if join is left or full outer join
|
36
33
|
bool IsLeftOuterJoin(JoinType type);
|
37
34
|
|
38
35
|
//! True if join is rght or full outer join
|
39
36
|
bool IsRightOuterJoin(JoinType type);
|
40
37
|
|
38
|
+
// **DEPRECATED**: Use EnumUtil directly instead.
|
39
|
+
string JoinTypeToString(JoinType type);
|
40
|
+
|
41
41
|
} // namespace duckdb
|
@@ -10,11 +10,11 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/assert.hpp"
|
12
12
|
#include "duckdb/common/exception_format_value.hpp"
|
13
|
-
#include "duckdb/common/vector.hpp"
|
14
13
|
#include "duckdb/common/shared_ptr.hpp"
|
15
14
|
#include "duckdb/common/map.hpp"
|
16
15
|
#include "duckdb/common/typedefs.hpp"
|
17
16
|
|
17
|
+
#include <vector>
|
18
18
|
#include <stdexcept>
|
19
19
|
|
20
20
|
namespace duckdb {
|
@@ -105,14 +105,14 @@ public:
|
|
105
105
|
|
106
106
|
template <typename... Args>
|
107
107
|
static string ConstructMessage(const string &msg, Args... params) {
|
108
|
-
vector<ExceptionFormatValue> values;
|
108
|
+
std::vector<ExceptionFormatValue> values;
|
109
109
|
return ConstructMessageRecursive(msg, values, params...);
|
110
110
|
}
|
111
111
|
|
112
|
-
DUCKDB_API static string ConstructMessageRecursive(const string &msg, vector<ExceptionFormatValue> &values);
|
112
|
+
DUCKDB_API static string ConstructMessageRecursive(const string &msg, std::vector<ExceptionFormatValue> &values);
|
113
113
|
|
114
114
|
template <class T, typename... Args>
|
115
|
-
static string ConstructMessageRecursive(const string &msg, vector<ExceptionFormatValue> &values, T param,
|
115
|
+
static string ConstructMessageRecursive(const string &msg, std::vector<ExceptionFormatValue> &values, T param,
|
116
116
|
Args... params) {
|
117
117
|
values.push_back(ExceptionFormatValue::CreateFormatValue<T>(param));
|
118
118
|
return ConstructMessageRecursive(msg, values, params...);
|
@@ -10,7 +10,8 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/string.hpp"
|
12
12
|
#include "duckdb/common/hugeint.hpp"
|
13
|
-
|
13
|
+
|
14
|
+
#include <vector>
|
14
15
|
|
15
16
|
namespace duckdb {
|
16
17
|
|
@@ -40,7 +41,7 @@ public:
|
|
40
41
|
static ExceptionFormatValue CreateFormatValue(T value) {
|
41
42
|
return int64_t(value);
|
42
43
|
}
|
43
|
-
static string Format(const string &msg, vector<ExceptionFormatValue> &values);
|
44
|
+
static string Format(const string &msg, std::vector<ExceptionFormatValue> &values);
|
44
45
|
};
|
45
46
|
|
46
47
|
template <>
|
@@ -9,6 +9,8 @@
|
|
9
9
|
#pragma once
|
10
10
|
|
11
11
|
#include "duckdb/common/types.hpp"
|
12
|
+
#include "duckdb/common/hive_partitioning.hpp"
|
13
|
+
#include "re2/re2.h"
|
12
14
|
|
13
15
|
namespace duckdb {
|
14
16
|
class Serializer;
|
@@ -18,11 +20,53 @@ struct BindInfo;
|
|
18
20
|
struct MultiFileReaderOptions {
|
19
21
|
bool filename = false;
|
20
22
|
bool hive_partitioning = false;
|
23
|
+
bool auto_detect_hive_partitioning = true;
|
21
24
|
bool union_by_name = false;
|
22
25
|
|
23
26
|
DUCKDB_API void Serialize(Serializer &serializer) const;
|
24
27
|
DUCKDB_API static MultiFileReaderOptions Deserialize(Deserializer &source);
|
25
28
|
DUCKDB_API void AddBatchInfo(BindInfo &bind_info) const;
|
29
|
+
|
30
|
+
static bool AutoDetectHivePartitioning(const vector<string> &files) {
|
31
|
+
if (files.empty()) {
|
32
|
+
return false;
|
33
|
+
}
|
34
|
+
|
35
|
+
std::unordered_set<string> uset;
|
36
|
+
idx_t splits_size;
|
37
|
+
{
|
38
|
+
// front file
|
39
|
+
auto splits = StringUtil::Split(files.front(), FileSystem::PathSeparator());
|
40
|
+
splits_size = splits.size();
|
41
|
+
if (splits.size() < 2) {
|
42
|
+
return false;
|
43
|
+
}
|
44
|
+
for (auto it = splits.begin(); it != std::prev(splits.end()); it++) {
|
45
|
+
auto part = StringUtil::Split(*it, "=");
|
46
|
+
if (part.size() == 2) {
|
47
|
+
uset.insert(part.front());
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
if (uset.empty()) {
|
52
|
+
return false;
|
53
|
+
}
|
54
|
+
for (auto &file : files) {
|
55
|
+
auto splits = StringUtil::Split(file, FileSystem::PathSeparator());
|
56
|
+
if (splits.size() != splits_size) {
|
57
|
+
return false;
|
58
|
+
}
|
59
|
+
for (auto it = splits.begin(); it != std::prev(splits.end()); it++) {
|
60
|
+
auto part = StringUtil::Split(*it, "=");
|
61
|
+
if (part.size() == 2) {
|
62
|
+
if (uset.find(part.front()) == uset.end()) {
|
63
|
+
return false;
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
68
|
+
return true;
|
69
|
+
}
|
26
70
|
};
|
27
71
|
|
28
72
|
} // namespace duckdb
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#pragma once
|
2
|
+
#include "duckdb/common/serializer/format_deserializer.hpp"
|
3
|
+
|
4
|
+
namespace duckdb {
|
5
|
+
|
6
|
+
class BinaryDeserializer : public FormatDeserializer {
|
7
|
+
public:
|
8
|
+
template <class T>
|
9
|
+
static unique_ptr<T> Deserialize(data_ptr_t ptr, idx_t length) {
|
10
|
+
BinaryDeserializer deserializer(ptr, length);
|
11
|
+
deserializer.OnObjectBegin();
|
12
|
+
auto result = T::FormatDeserialize(deserializer);
|
13
|
+
deserializer.OnObjectEnd();
|
14
|
+
return result;
|
15
|
+
}
|
16
|
+
|
17
|
+
private:
|
18
|
+
explicit BinaryDeserializer(data_ptr_t ptr, idx_t length) : ptr(ptr), end_ptr(ptr + length) {
|
19
|
+
deserialize_enum_from_string = false;
|
20
|
+
}
|
21
|
+
struct State {
|
22
|
+
uint32_t expected_field_count;
|
23
|
+
idx_t expected_size;
|
24
|
+
uint32_t read_field_count;
|
25
|
+
State(uint32_t expected_field_count, idx_t expected_size)
|
26
|
+
: expected_field_count(expected_field_count), expected_size(expected_size), read_field_count(0) {
|
27
|
+
}
|
28
|
+
};
|
29
|
+
|
30
|
+
const char *current_tag = nullptr;
|
31
|
+
data_ptr_t ptr;
|
32
|
+
data_ptr_t end_ptr;
|
33
|
+
vector<State> stack;
|
34
|
+
|
35
|
+
template <class T>
|
36
|
+
T ReadPrimitive() {
|
37
|
+
T value;
|
38
|
+
ReadData((data_ptr_t)&value, sizeof(T));
|
39
|
+
return value;
|
40
|
+
}
|
41
|
+
|
42
|
+
void ReadData(data_ptr_t buffer, idx_t read_size) {
|
43
|
+
if (ptr + read_size > end_ptr) {
|
44
|
+
throw SerializationException("Failed to deserialize: not enough data in buffer to fulfill read request");
|
45
|
+
}
|
46
|
+
memcpy(buffer, ptr, read_size);
|
47
|
+
ptr += read_size;
|
48
|
+
}
|
49
|
+
|
50
|
+
// Set the 'tag' of the property to read
|
51
|
+
void SetTag(const char *tag) final;
|
52
|
+
|
53
|
+
//===--------------------------------------------------------------------===//
|
54
|
+
// Nested Types Hooks
|
55
|
+
//===--------------------------------------------------------------------===//
|
56
|
+
void OnObjectBegin() final;
|
57
|
+
void OnObjectEnd() final;
|
58
|
+
idx_t OnListBegin() final;
|
59
|
+
void OnListEnd() final;
|
60
|
+
idx_t OnMapBegin() final;
|
61
|
+
void OnMapEnd() final;
|
62
|
+
void OnMapEntryBegin() final;
|
63
|
+
void OnMapEntryEnd() final;
|
64
|
+
void OnMapKeyBegin() final;
|
65
|
+
void OnMapValueBegin() final;
|
66
|
+
bool OnOptionalBegin() final;
|
67
|
+
|
68
|
+
void OnPairBegin() final;
|
69
|
+
void OnPairKeyBegin() final;
|
70
|
+
void OnPairValueBegin() final;
|
71
|
+
void OnPairEnd() final;
|
72
|
+
|
73
|
+
//===--------------------------------------------------------------------===//
|
74
|
+
// Primitive Types
|
75
|
+
//===--------------------------------------------------------------------===//
|
76
|
+
bool ReadBool() final;
|
77
|
+
int8_t ReadSignedInt8() final;
|
78
|
+
uint8_t ReadUnsignedInt8() final;
|
79
|
+
int16_t ReadSignedInt16() final;
|
80
|
+
uint16_t ReadUnsignedInt16() final;
|
81
|
+
int32_t ReadSignedInt32() final;
|
82
|
+
uint32_t ReadUnsignedInt32() final;
|
83
|
+
int64_t ReadSignedInt64() final;
|
84
|
+
uint64_t ReadUnsignedInt64() final;
|
85
|
+
float ReadFloat() final;
|
86
|
+
double ReadDouble() final;
|
87
|
+
string ReadString() final;
|
88
|
+
interval_t ReadInterval() final;
|
89
|
+
hugeint_t ReadHugeInt() final;
|
90
|
+
void ReadDataPtr(data_ptr_t &ptr, idx_t count) final;
|
91
|
+
};
|
92
|
+
|
93
|
+
} // namespace duckdb
|
@@ -0,0 +1,92 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/common/serializer/format_serializer.hpp"
|
4
|
+
|
5
|
+
namespace duckdb {
|
6
|
+
|
7
|
+
struct BinarySerializer : public FormatSerializer {
|
8
|
+
|
9
|
+
private:
|
10
|
+
struct State {
|
11
|
+
// how many fields are present in the object
|
12
|
+
uint32_t field_count;
|
13
|
+
// the size of the object
|
14
|
+
uint64_t size;
|
15
|
+
// the offset of the object start in the buffer
|
16
|
+
uint64_t offset;
|
17
|
+
};
|
18
|
+
|
19
|
+
const char *current_tag;
|
20
|
+
|
21
|
+
vector<data_t> data;
|
22
|
+
vector<State> stack;
|
23
|
+
|
24
|
+
template <class T>
|
25
|
+
void Write(T element) {
|
26
|
+
static_assert(std::is_trivially_destructible<T>(), "Write element must be trivially destructible");
|
27
|
+
WriteData((const_data_ptr_t)&element, sizeof(T));
|
28
|
+
}
|
29
|
+
void WriteData(const_data_ptr_t buffer, idx_t write_size) {
|
30
|
+
data.insert(data.end(), buffer, buffer + write_size);
|
31
|
+
stack.back().size += write_size;
|
32
|
+
}
|
33
|
+
|
34
|
+
explicit BinarySerializer() {
|
35
|
+
serialize_enum_as_string = false;
|
36
|
+
}
|
37
|
+
|
38
|
+
public:
|
39
|
+
template <class T>
|
40
|
+
static vector<data_t> Serialize(T &obj) {
|
41
|
+
BinarySerializer serializer;
|
42
|
+
serializer.OnObjectBegin();
|
43
|
+
obj.FormatSerialize(serializer);
|
44
|
+
serializer.OnObjectEnd();
|
45
|
+
return std::move(serializer.data);
|
46
|
+
}
|
47
|
+
|
48
|
+
void SetTag(const char *tag) final;
|
49
|
+
|
50
|
+
//===--------------------------------------------------------------------===//
|
51
|
+
// Nested Types Hooks
|
52
|
+
//===--------------------------------------------------------------------===//
|
53
|
+
void OnOptionalBegin(bool present) final;
|
54
|
+
void OnListBegin(idx_t count) final;
|
55
|
+
void OnListEnd(idx_t count) final;
|
56
|
+
void OnMapBegin(idx_t count) final;
|
57
|
+
void OnMapEntryBegin() final;
|
58
|
+
void OnMapEntryEnd() final;
|
59
|
+
void OnMapKeyBegin() final;
|
60
|
+
void OnMapValueBegin() final;
|
61
|
+
void OnMapEnd(idx_t count) final;
|
62
|
+
void OnObjectBegin() final;
|
63
|
+
void OnObjectEnd() final;
|
64
|
+
void OnPairBegin() final;
|
65
|
+
void OnPairKeyBegin() final;
|
66
|
+
void OnPairValueBegin() final;
|
67
|
+
void OnPairEnd() final;
|
68
|
+
|
69
|
+
//===--------------------------------------------------------------------===//
|
70
|
+
// Primitive Types
|
71
|
+
//===--------------------------------------------------------------------===//
|
72
|
+
void WriteNull() final;
|
73
|
+
void WriteValue(uint8_t value) final;
|
74
|
+
void WriteValue(int8_t value) final;
|
75
|
+
void WriteValue(uint16_t value) final;
|
76
|
+
void WriteValue(int16_t value) final;
|
77
|
+
void WriteValue(uint32_t value) final;
|
78
|
+
void WriteValue(int32_t value) final;
|
79
|
+
void WriteValue(uint64_t value) final;
|
80
|
+
void WriteValue(int64_t value) final;
|
81
|
+
void WriteValue(hugeint_t value) final;
|
82
|
+
void WriteValue(float value) final;
|
83
|
+
void WriteValue(double value) final;
|
84
|
+
void WriteValue(interval_t value) final;
|
85
|
+
void WriteValue(const string_t value) final;
|
86
|
+
void WriteValue(const string &value) final;
|
87
|
+
void WriteValue(const char *value) final;
|
88
|
+
void WriteValue(bool value) final;
|
89
|
+
void WriteDataPtr(const_data_ptr_t ptr, idx_t count) final;
|
90
|
+
};
|
91
|
+
|
92
|
+
} // namespace duckdb
|
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/field_writer.hpp"
|
12
12
|
#include "duckdb/common/serializer.hpp"
|
13
|
-
#include "duckdb/common/
|
13
|
+
#include "duckdb/common/enum_util.hpp"
|
14
14
|
#include "duckdb/common/serializer/serialization_traits.hpp"
|
15
15
|
#include "duckdb/common/types/interval.hpp"
|
16
16
|
#include "duckdb/common/types/string_type.hpp"
|
@@ -294,8 +294,12 @@ private:
|
|
294
294
|
// Deserialize a Enum
|
295
295
|
template <typename T = void>
|
296
296
|
inline typename std::enable_if<std::is_enum<T>::value, T>::type Read() {
|
297
|
-
|
298
|
-
|
297
|
+
if (deserialize_enum_from_string) {
|
298
|
+
auto str = ReadString();
|
299
|
+
return EnumUtil::FromString<T>(str.c_str());
|
300
|
+
} else {
|
301
|
+
return (T)Read<typename std::underlying_type<T>::type>();
|
302
|
+
}
|
299
303
|
}
|
300
304
|
|
301
305
|
// Deserialize a interval_t
|
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/field_writer.hpp"
|
12
12
|
#include "duckdb/common/serializer.hpp"
|
13
|
-
#include "duckdb/common/
|
13
|
+
#include "duckdb/common/enum_util.hpp"
|
14
14
|
#include "duckdb/common/serializer/serialization_traits.hpp"
|
15
15
|
#include "duckdb/common/types/interval.hpp"
|
16
16
|
#include "duckdb/common/types/string_type.hpp"
|
@@ -39,7 +39,7 @@ public:
|
|
39
39
|
SetTag(tag);
|
40
40
|
if (serialize_enum_as_string) {
|
41
41
|
// Use the enum serializer to lookup tostring function
|
42
|
-
auto str =
|
42
|
+
auto str = EnumUtil::ToChars(value);
|
43
43
|
WriteValue(str);
|
44
44
|
} else {
|
45
45
|
// Use the underlying type
|
@@ -229,7 +229,7 @@ private:
|
|
229
229
|
//! The number of entries stored in the TupleDataCollection
|
230
230
|
idx_t count;
|
231
231
|
//! The data segments of the TupleDataCollection
|
232
|
-
|
232
|
+
unsafe_vector<TupleDataSegment> segments;
|
233
233
|
//! The set of scatter functions
|
234
234
|
vector<TupleDataScatterFunction> scatter_functions;
|
235
235
|
//! The set of gather functions
|
@@ -109,7 +109,7 @@ public:
|
|
109
109
|
//! The allocator for this segment
|
110
110
|
shared_ptr<TupleDataAllocator> allocator;
|
111
111
|
//! The chunks of this segment
|
112
|
-
|
112
|
+
unsafe_vector<TupleDataChunk> chunks;
|
113
113
|
//! The tuple count of this segment
|
114
114
|
idx_t count;
|
115
115
|
|
@@ -457,6 +457,7 @@ struct AggregateStateType {
|
|
457
457
|
DUCKDB_API static const aggregate_state_t &GetStateType(const LogicalType &type);
|
458
458
|
};
|
459
459
|
|
460
|
+
// **DEPRECATED**: Use EnumUtil directly instead.
|
460
461
|
DUCKDB_API string LogicalTypeIdToString(LogicalTypeId type);
|
461
462
|
|
462
463
|
DUCKDB_API LogicalTypeId TransformStringToLogicalTypeId(const string &str);
|
@@ -10,22 +10,31 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/common/assert.hpp"
|
12
12
|
#include "duckdb/common/typedefs.hpp"
|
13
|
+
#include "duckdb/common/likely.hpp"
|
14
|
+
#include "duckdb/common/exception.hpp"
|
13
15
|
#include <vector>
|
14
16
|
|
15
17
|
namespace duckdb {
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
template <class _Tp, class _Allocator = std::allocator<_Tp>>
|
21
|
-
class vector : public std::vector<_Tp, _Allocator> {
|
19
|
+
template <class _Tp, bool SAFE = true>
|
20
|
+
class vector : public std::vector<_Tp, std::allocator<_Tp>> {
|
22
21
|
public:
|
23
|
-
using original = std::vector<_Tp,
|
22
|
+
using original = std::vector<_Tp, std::allocator<_Tp>>;
|
24
23
|
using original::original;
|
25
24
|
using size_type = typename original::size_type;
|
26
25
|
using const_reference = typename original::const_reference;
|
27
26
|
using reference = typename original::reference;
|
28
27
|
|
28
|
+
static inline void AssertIndexInBounds(idx_t index, idx_t size) {
|
29
|
+
#if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
|
30
|
+
return;
|
31
|
+
#else
|
32
|
+
if (DUCKDB_UNLIKELY(index >= size)) {
|
33
|
+
throw InternalException("Attempted to access index %ld within vector of size %ld", index, size);
|
34
|
+
}
|
35
|
+
#endif
|
36
|
+
}
|
37
|
+
|
29
38
|
#ifdef DUCKDB_CLANG_TIDY
|
30
39
|
// This is necessary to tell clang-tidy that it reinitializes the variable after a move
|
31
40
|
[[clang::reinitializes]]
|
@@ -40,19 +49,57 @@ public:
|
|
40
49
|
vector() = default;
|
41
50
|
vector(original &&other) : original(std::move(other)) {
|
42
51
|
}
|
52
|
+
template <bool _SAFE>
|
53
|
+
vector(vector<_Tp, _SAFE> &&other) : original(std::move(other)) {
|
54
|
+
}
|
43
55
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
56
|
+
template <bool _SAFE = false>
|
57
|
+
inline typename original::reference get(typename original::size_type __n) {
|
58
|
+
if (_SAFE) {
|
59
|
+
AssertIndexInBounds(__n, original::size());
|
60
|
+
}
|
48
61
|
return original::operator[](__n);
|
49
62
|
}
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
63
|
+
|
64
|
+
template <bool _SAFE = false>
|
65
|
+
inline typename original::const_reference get(typename original::size_type __n) const {
|
66
|
+
if (_SAFE) {
|
67
|
+
AssertIndexInBounds(__n, original::size());
|
68
|
+
}
|
54
69
|
return original::operator[](__n);
|
55
70
|
}
|
71
|
+
|
72
|
+
typename original::reference operator[](typename original::size_type __n) {
|
73
|
+
return get<SAFE>(__n);
|
74
|
+
}
|
75
|
+
typename original::const_reference operator[](typename original::size_type __n) const {
|
76
|
+
return get<SAFE>(__n);
|
77
|
+
}
|
78
|
+
|
79
|
+
typename original::reference front() {
|
80
|
+
return get<SAFE>(0);
|
81
|
+
}
|
82
|
+
|
83
|
+
typename original::const_reference front() const {
|
84
|
+
return get<SAFE>(0);
|
85
|
+
}
|
86
|
+
|
87
|
+
typename original::reference back() {
|
88
|
+
if (original::empty()) {
|
89
|
+
throw InternalException("'back' called on an empty vector!");
|
90
|
+
}
|
91
|
+
return get<SAFE>(original::size() - 1);
|
92
|
+
}
|
93
|
+
|
94
|
+
typename original::const_reference back() const {
|
95
|
+
if (original::empty()) {
|
96
|
+
throw InternalException("'back' called on an empty vector!");
|
97
|
+
}
|
98
|
+
return get<SAFE>(original::size() - 1);
|
99
|
+
}
|
56
100
|
};
|
57
101
|
|
102
|
+
template <typename T>
|
103
|
+
using unsafe_vector = vector<T, false>;
|
104
|
+
|
58
105
|
} // namespace duckdb
|
@@ -102,9 +102,10 @@ public:
|
|
102
102
|
//! Add the given data to the HT, computing the aggregates grouped by the
|
103
103
|
//! data in the group chunk. When resize = true, aggregates will not be
|
104
104
|
//! computed but instead just assigned.
|
105
|
-
idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
|
105
|
+
idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
|
106
|
+
const unsafe_vector<idx_t> &filter);
|
106
107
|
idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes, DataChunk &payload,
|
107
|
-
const
|
108
|
+
const unsafe_vector<idx_t> &filter);
|
108
109
|
idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload, AggregateType filter);
|
109
110
|
|
110
111
|
//! Scan the HT starting from the scan_position until the result and group
|
package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp
CHANGED
@@ -21,7 +21,7 @@ public:
|
|
21
21
|
|
22
22
|
public:
|
23
23
|
// The indices of the aggregates that are distinct
|
24
|
-
|
24
|
+
unsafe_vector<idx_t> indices;
|
25
25
|
// The amount of radix_tables that are occupied
|
26
26
|
idx_t table_count;
|
27
27
|
//! Occupied tables, not equal to indices if aggregates share input data
|
@@ -34,7 +34,7 @@ public:
|
|
34
34
|
|
35
35
|
public:
|
36
36
|
static unique_ptr<DistinctAggregateCollectionInfo> Create(vector<unique_ptr<Expression>> &aggregates);
|
37
|
-
const
|
37
|
+
const unsafe_vector<idx_t> &Indices() const;
|
38
38
|
bool AnyDistinct() const;
|
39
39
|
|
40
40
|
private:
|
package/src/duckdb/src/include/duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp
CHANGED
@@ -42,7 +42,7 @@ public:
|
|
42
42
|
const vector<vector<idx_t>> &GetGroupingFunctions() const;
|
43
43
|
|
44
44
|
void InitializeGroupby(vector<unique_ptr<Expression>> groups, vector<unique_ptr<Expression>> expressions,
|
45
|
-
vector<
|
45
|
+
vector<unsafe_vector<idx_t>> grouping_functions);
|
46
46
|
|
47
47
|
//! Initialize a GroupedAggregateData object for use with distinct aggregates
|
48
48
|
void InitializeDistinct(const unique_ptr<Expression> &aggregate, const vector<unique_ptr<Expression>> *groups_p);
|
@@ -50,7 +50,7 @@ public:
|
|
50
50
|
private:
|
51
51
|
void InitializeDistinctGroups(const vector<unique_ptr<Expression>> *groups);
|
52
52
|
void InitializeGroupbyGroups(vector<unique_ptr<Expression>> groups);
|
53
|
-
void SetGroupingFunctions(vector<
|
53
|
+
void SetGroupingFunctions(vector<unsafe_vector<idx_t>> &functions);
|
54
54
|
};
|
55
55
|
|
56
56
|
} // namespace duckdb
|
package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp
CHANGED
@@ -67,7 +67,7 @@ public:
|
|
67
67
|
vector<unique_ptr<Expression>> groups, idx_t estimated_cardinality);
|
68
68
|
PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types, vector<unique_ptr<Expression>> expressions,
|
69
69
|
vector<unique_ptr<Expression>> groups, vector<GroupingSet> grouping_sets,
|
70
|
-
vector<
|
70
|
+
vector<unsafe_vector<idx_t>> grouping_functions, idx_t estimated_cardinality);
|
71
71
|
|
72
72
|
//! The grouping sets
|
73
73
|
GroupedAggregateData grouped_aggregate_data;
|
@@ -80,8 +80,8 @@ public:
|
|
80
80
|
vector<LogicalType> input_group_types;
|
81
81
|
|
82
82
|
// Filters given to Sink and friends
|
83
|
-
|
84
|
-
|
83
|
+
unsafe_vector<idx_t> non_distinct_filter;
|
84
|
+
unsafe_vector<idx_t> distinct_filter;
|
85
85
|
|
86
86
|
unordered_map<Expression *, size_t> filter_indexes;
|
87
87
|
|
package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/execution/physical_operator.hpp"
|
12
|
+
#include "duckdb/parser/parsed_data/copy_info.hpp"
|
13
|
+
#include "duckdb/function/copy_function.hpp"
|
14
|
+
#include "duckdb/common/file_system.hpp"
|
15
|
+
#include "duckdb/common/filename_pattern.hpp"
|
16
|
+
|
17
|
+
namespace duckdb {
|
18
|
+
|
19
|
+
//! Copy the contents of a query into a table
|
20
|
+
class PhysicalBatchCopyToFile : public PhysicalOperator {
|
21
|
+
public:
|
22
|
+
static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::BATCH_COPY_TO_FILE;
|
23
|
+
|
24
|
+
public:
|
25
|
+
PhysicalBatchCopyToFile(vector<LogicalType> types, CopyFunction function, unique_ptr<FunctionData> bind_data,
|
26
|
+
idx_t estimated_cardinality);
|
27
|
+
|
28
|
+
CopyFunction function;
|
29
|
+
unique_ptr<FunctionData> bind_data;
|
30
|
+
string file_path;
|
31
|
+
bool use_tmp_file;
|
32
|
+
|
33
|
+
public:
|
34
|
+
// Source interface
|
35
|
+
SourceResultType GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const override;
|
36
|
+
|
37
|
+
bool IsSource() const override {
|
38
|
+
return true;
|
39
|
+
}
|
40
|
+
|
41
|
+
public:
|
42
|
+
// Sink interface
|
43
|
+
SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override;
|
44
|
+
void Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const override;
|
45
|
+
SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
46
|
+
GlobalSinkState &gstate) const override;
|
47
|
+
unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
|
48
|
+
unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
|
49
|
+
void NextBatch(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p) const override;
|
50
|
+
|
51
|
+
bool RequiresBatchIndex() const override {
|
52
|
+
return true;
|
53
|
+
}
|
54
|
+
|
55
|
+
bool IsSink() const override {
|
56
|
+
return true;
|
57
|
+
}
|
58
|
+
|
59
|
+
bool ParallelSink() const override {
|
60
|
+
return true;
|
61
|
+
}
|
62
|
+
|
63
|
+
private:
|
64
|
+
void PrepareBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t batch_index,
|
65
|
+
unique_ptr<ColumnDataCollection> collection) const;
|
66
|
+
void FlushBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t min_index) const;
|
67
|
+
};
|
68
|
+
} // namespace duckdb
|