duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -8,9 +8,9 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
+
#include "duckdb/common/enum_util.hpp"
|
11
12
|
#include "duckdb/common/field_writer.hpp"
|
12
13
|
#include "duckdb/common/serializer.hpp"
|
13
|
-
#include "duckdb/common/enum_util.hpp"
|
14
14
|
#include "duckdb/common/serializer/serialization_traits.hpp"
|
15
15
|
#include "duckdb/common/types/interval.hpp"
|
16
16
|
#include "duckdb/common/types/string_type.hpp"
|
@@ -20,47 +20,79 @@
|
|
20
20
|
namespace duckdb {
|
21
21
|
|
22
22
|
class FormatSerializer {
|
23
|
-
friend Vector;
|
24
|
-
|
25
23
|
protected:
|
26
24
|
bool serialize_enum_as_string = false;
|
25
|
+
bool serialize_default_values = false;
|
26
|
+
|
27
|
+
public:
|
28
|
+
class List {
|
29
|
+
friend FormatSerializer;
|
30
|
+
|
31
|
+
private:
|
32
|
+
FormatSerializer &serializer;
|
33
|
+
explicit List(FormatSerializer &serializer) : serializer(serializer) {
|
34
|
+
}
|
35
|
+
|
36
|
+
public:
|
37
|
+
// Serialize an element
|
38
|
+
template <class T>
|
39
|
+
void WriteElement(const T &value);
|
40
|
+
|
41
|
+
// Serialize an object
|
42
|
+
template <class FUNC>
|
43
|
+
void WriteObject(FUNC f);
|
44
|
+
};
|
27
45
|
|
28
46
|
public:
|
29
47
|
// Serialize a value
|
30
48
|
template <class T>
|
31
49
|
void WriteProperty(const field_id_t field_id, const char *tag, const T &value) {
|
32
|
-
|
50
|
+
OnPropertyBegin(field_id, tag);
|
33
51
|
WriteValue(value);
|
52
|
+
OnPropertyEnd();
|
34
53
|
}
|
35
54
|
|
36
|
-
//
|
37
|
-
template <class
|
38
|
-
void
|
39
|
-
|
40
|
-
if (
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
OnOptionalBegin(true);
|
45
|
-
WriteValue(*ptr);
|
46
|
-
OnOptionalEnd(true);
|
55
|
+
// Default value
|
56
|
+
template <class T>
|
57
|
+
void WritePropertyWithDefault(const field_id_t field_id, const char *tag, const T &value, const T &&default_value) {
|
58
|
+
// If current value is default, don't write it
|
59
|
+
if (!serialize_default_values && (value == default_value)) {
|
60
|
+
OnOptionalPropertyBegin(field_id, tag, false);
|
61
|
+
OnOptionalPropertyEnd(false);
|
62
|
+
return;
|
47
63
|
}
|
64
|
+
OnOptionalPropertyBegin(field_id, tag, true);
|
65
|
+
WriteValue(value);
|
66
|
+
OnOptionalPropertyEnd(true);
|
48
67
|
}
|
49
68
|
|
50
69
|
// Special case: data_ptr_T
|
51
70
|
void WriteProperty(const field_id_t field_id, const char *tag, const_data_ptr_t ptr, idx_t count) {
|
52
|
-
|
71
|
+
OnPropertyBegin(field_id, tag);
|
53
72
|
WriteDataPtr(ptr, count);
|
73
|
+
OnPropertyEnd();
|
54
74
|
}
|
55
75
|
|
56
|
-
// Manually begin an object
|
57
|
-
|
58
|
-
|
76
|
+
// Manually begin an object
|
77
|
+
template <class FUNC>
|
78
|
+
void WriteObject(const field_id_t field_id, const char *tag, FUNC f) {
|
79
|
+
OnPropertyBegin(field_id, tag);
|
59
80
|
OnObjectBegin();
|
81
|
+
f(*this);
|
82
|
+
OnObjectEnd();
|
83
|
+
OnPropertyEnd();
|
60
84
|
}
|
61
85
|
|
62
|
-
|
63
|
-
|
86
|
+
template <class FUNC>
|
87
|
+
void WriteList(const field_id_t field_id, const char *tag, idx_t count, FUNC func) {
|
88
|
+
OnPropertyBegin(field_id, tag);
|
89
|
+
OnListBegin(count);
|
90
|
+
List list {*this};
|
91
|
+
for (idx_t i = 0; i < count; i++) {
|
92
|
+
func(list, i);
|
93
|
+
}
|
94
|
+
OnListEnd();
|
95
|
+
OnPropertyEnd();
|
64
96
|
}
|
65
97
|
|
66
98
|
protected:
|
@@ -82,27 +114,38 @@ protected:
|
|
82
114
|
WriteValue(ptr.get());
|
83
115
|
}
|
84
116
|
|
117
|
+
// Shared Pointer Ref
|
118
|
+
template <typename T>
|
119
|
+
void WriteValue(const shared_ptr<T> &ptr) {
|
120
|
+
WriteValue(ptr.get());
|
121
|
+
}
|
122
|
+
|
85
123
|
// Pointer
|
86
124
|
template <typename T>
|
87
|
-
|
125
|
+
void WriteValue(const T *ptr) {
|
88
126
|
if (ptr == nullptr) {
|
89
|
-
|
127
|
+
OnNullableBegin(false);
|
128
|
+
OnNullableEnd();
|
90
129
|
} else {
|
130
|
+
OnNullableBegin(true);
|
91
131
|
WriteValue(*ptr);
|
132
|
+
OnNullableEnd();
|
92
133
|
}
|
93
134
|
}
|
94
135
|
|
95
136
|
// Pair
|
96
137
|
template <class K, class V>
|
97
138
|
void WriteValue(const std::pair<K, V> &pair) {
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
139
|
+
OnObjectBegin();
|
140
|
+
WriteProperty(0, "first", pair.first);
|
141
|
+
WriteProperty(1, "second", pair.second);
|
142
|
+
OnObjectEnd();
|
143
|
+
}
|
144
|
+
|
145
|
+
// Reference Wrapper
|
146
|
+
template <class T>
|
147
|
+
void WriteValue(const reference<T> ref) {
|
148
|
+
WriteValue(ref.get());
|
106
149
|
}
|
107
150
|
|
108
151
|
// Vector
|
@@ -113,7 +156,7 @@ protected:
|
|
113
156
|
for (auto &item : vec) {
|
114
157
|
WriteValue(item);
|
115
158
|
}
|
116
|
-
OnListEnd(
|
159
|
+
OnListEnd();
|
117
160
|
}
|
118
161
|
|
119
162
|
template <class T>
|
@@ -123,7 +166,7 @@ protected:
|
|
123
166
|
for (auto &item : vec) {
|
124
167
|
WriteValue(item);
|
125
168
|
}
|
126
|
-
OnListEnd(
|
169
|
+
OnListEnd();
|
127
170
|
}
|
128
171
|
|
129
172
|
// UnorderedSet
|
@@ -135,7 +178,7 @@ protected:
|
|
135
178
|
for (auto &item : set) {
|
136
179
|
WriteValue(item);
|
137
180
|
}
|
138
|
-
OnListEnd(
|
181
|
+
OnListEnd();
|
139
182
|
}
|
140
183
|
|
141
184
|
// Set
|
@@ -147,108 +190,65 @@ protected:
|
|
147
190
|
for (auto &item : set) {
|
148
191
|
WriteValue(item);
|
149
192
|
}
|
150
|
-
OnListEnd(
|
193
|
+
OnListEnd();
|
151
194
|
}
|
152
195
|
|
153
196
|
// Map
|
197
|
+
// serialized as a list of pairs
|
154
198
|
template <class K, class V, class HASH, class CMP>
|
155
199
|
void WriteValue(const duckdb::unordered_map<K, V, HASH, CMP> &map) {
|
156
200
|
auto count = map.size();
|
157
|
-
|
201
|
+
OnListBegin(count);
|
158
202
|
for (auto &item : map) {
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
OnMapValueBegin();
|
164
|
-
WriteValue(item.second);
|
165
|
-
OnMapValueEnd();
|
166
|
-
OnMapEntryEnd();
|
203
|
+
OnObjectBegin();
|
204
|
+
WriteProperty(0, "key", item.first);
|
205
|
+
WriteProperty(1, "value", item.second);
|
206
|
+
OnObjectEnd();
|
167
207
|
}
|
168
|
-
|
208
|
+
OnListEnd();
|
169
209
|
}
|
170
210
|
|
171
211
|
// Map
|
212
|
+
// serialized as a list of pairs
|
172
213
|
template <class K, class V, class HASH, class CMP>
|
173
214
|
void WriteValue(const duckdb::map<K, V, HASH, CMP> &map) {
|
174
215
|
auto count = map.size();
|
175
|
-
|
216
|
+
OnListBegin(count);
|
176
217
|
for (auto &item : map) {
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
OnMapValueBegin();
|
182
|
-
WriteValue(item.second);
|
183
|
-
OnMapValueEnd();
|
184
|
-
OnMapEntryEnd();
|
218
|
+
OnObjectBegin();
|
219
|
+
WriteProperty(0, "key", item.first);
|
220
|
+
WriteProperty(1, "value", item.second);
|
221
|
+
OnObjectEnd();
|
185
222
|
}
|
186
|
-
|
223
|
+
OnListEnd();
|
187
224
|
}
|
188
225
|
|
189
226
|
// class or struct implementing `FormatSerialize(FormatSerializer& FormatSerializer)`;
|
190
227
|
template <typename T>
|
191
228
|
typename std::enable_if<has_serialize<T>::value>::type WriteValue(const T &value) {
|
192
|
-
// Else, we defer to the .FormatSerialize method
|
193
229
|
OnObjectBegin();
|
194
230
|
value.FormatSerialize(*this);
|
195
231
|
OnObjectEnd();
|
196
232
|
}
|
197
233
|
|
198
|
-
|
199
|
-
virtual void SetTag(const field_id_t field_id, const char *tag) {
|
200
|
-
(void)field_id;
|
201
|
-
(void)tag;
|
202
|
-
}
|
203
|
-
|
234
|
+
protected:
|
204
235
|
// Hooks for subclasses to override to implement custom behavior
|
205
|
-
virtual void
|
206
|
-
|
207
|
-
|
208
|
-
virtual void
|
209
|
-
|
210
|
-
|
211
|
-
virtual void
|
212
|
-
|
213
|
-
|
214
|
-
virtual void
|
215
|
-
(void)count;
|
216
|
-
}
|
217
|
-
virtual void OnMapEntryBegin() {
|
218
|
-
}
|
219
|
-
virtual void OnMapEntryEnd() {
|
220
|
-
}
|
221
|
-
virtual void OnMapKeyBegin() {
|
222
|
-
}
|
223
|
-
virtual void OnMapKeyEnd() {
|
224
|
-
}
|
225
|
-
virtual void OnMapValueBegin() {
|
226
|
-
}
|
227
|
-
virtual void OnMapValueEnd() {
|
228
|
-
}
|
229
|
-
virtual void OnOptionalBegin(bool present) {
|
230
|
-
}
|
231
|
-
virtual void OnOptionalEnd(bool present) {
|
232
|
-
}
|
233
|
-
virtual void OnObjectBegin() {
|
234
|
-
}
|
235
|
-
virtual void OnObjectEnd() {
|
236
|
-
}
|
237
|
-
virtual void OnPairBegin() {
|
238
|
-
}
|
239
|
-
virtual void OnPairKeyBegin() {
|
240
|
-
}
|
241
|
-
virtual void OnPairKeyEnd() {
|
242
|
-
}
|
243
|
-
virtual void OnPairValueBegin() {
|
244
|
-
}
|
245
|
-
virtual void OnPairValueEnd() {
|
246
|
-
}
|
247
|
-
virtual void OnPairEnd() {
|
248
|
-
}
|
236
|
+
virtual void OnPropertyBegin(const field_id_t field_id, const char *tag) = 0;
|
237
|
+
virtual void OnPropertyEnd() = 0;
|
238
|
+
virtual void OnOptionalPropertyBegin(const field_id_t field_id, const char *tag, bool present) = 0;
|
239
|
+
virtual void OnOptionalPropertyEnd(bool present) = 0;
|
240
|
+
virtual void OnObjectBegin() = 0;
|
241
|
+
virtual void OnObjectEnd() = 0;
|
242
|
+
virtual void OnListBegin(idx_t count) = 0;
|
243
|
+
virtual void OnListEnd() = 0;
|
244
|
+
virtual void OnNullableBegin(bool present) = 0;
|
245
|
+
virtual void OnNullableEnd() = 0;
|
249
246
|
|
250
247
|
// Handle primitive types, a serializer needs to implement these.
|
251
248
|
virtual void WriteNull() = 0;
|
249
|
+
virtual void WriteValue(char value) {
|
250
|
+
throw NotImplementedException("Write char value not implemented");
|
251
|
+
}
|
252
252
|
virtual void WriteValue(bool value) = 0;
|
253
253
|
virtual void WriteValue(uint8_t value) = 0;
|
254
254
|
virtual void WriteValue(int8_t value) = 0;
|
@@ -264,7 +264,6 @@ protected:
|
|
264
264
|
virtual void WriteValue(const string_t value) = 0;
|
265
265
|
virtual void WriteValue(const string &value) = 0;
|
266
266
|
virtual void WriteValue(const char *str) = 0;
|
267
|
-
virtual void WriteValue(interval_t value) = 0;
|
268
267
|
virtual void WriteDataPtr(const_data_ptr_t ptr, idx_t count) = 0;
|
269
268
|
void WriteValue(LogicalIndex value) {
|
270
269
|
WriteValue(value.index);
|
@@ -278,4 +277,17 @@ protected:
|
|
278
277
|
template <>
|
279
278
|
void FormatSerializer::WriteValue(const vector<bool> &vec);
|
280
279
|
|
280
|
+
// List Impl
|
281
|
+
template <class FUNC>
|
282
|
+
void FormatSerializer::List::WriteObject(FUNC f) {
|
283
|
+
serializer.OnObjectBegin();
|
284
|
+
f(serializer);
|
285
|
+
serializer.OnObjectEnd();
|
286
|
+
}
|
287
|
+
|
288
|
+
template <class T>
|
289
|
+
void FormatSerializer::List::WriteElement(const T &value) {
|
290
|
+
serializer.WriteValue(value);
|
291
|
+
}
|
292
|
+
|
281
293
|
} // namespace duckdb
|
@@ -12,7 +12,8 @@ namespace duckdb {
|
|
12
12
|
class FormatSerializer; // Forward declare
|
13
13
|
class FormatDeserializer; // Forward declare
|
14
14
|
|
15
|
-
typedef
|
15
|
+
typedef uint16_t field_id_t;
|
16
|
+
const field_id_t MESSAGE_TERMINATOR_FIELD_ID = 0xFFFF;
|
16
17
|
|
17
18
|
// Backport to c++11
|
18
19
|
template <class...>
|
@@ -1,3 +1,11 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/common/shared_ptr.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
1
9
|
#pragma once
|
2
10
|
|
3
11
|
#include <memory>
|
@@ -31,6 +31,7 @@ public:
|
|
31
31
|
|
32
32
|
GlobalSortStatePtr global_sort;
|
33
33
|
atomic<idx_t> count;
|
34
|
+
idx_t batch_base;
|
34
35
|
|
35
36
|
// Mask computation
|
36
37
|
SortLayout partition_layout;
|
@@ -59,9 +60,6 @@ public:
|
|
59
60
|
void UpdateLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
60
61
|
void CombineLocalPartition(GroupingPartition &local_partition, GroupingAppend &local_append);
|
61
62
|
|
62
|
-
void BuildSortState(TupleDataCollection &group_data, GlobalSortState &global_sort) const;
|
63
|
-
void BuildSortState(TupleDataCollection &group_data, PartitionGlobalHashGroup &global_sort);
|
64
|
-
|
65
63
|
ClientContext &context;
|
66
64
|
BufferManager &buffer_manager;
|
67
65
|
Allocator &allocator;
|
@@ -128,7 +126,7 @@ public:
|
|
128
126
|
void Combine();
|
129
127
|
};
|
130
128
|
|
131
|
-
enum class PartitionSortStage : uint8_t { INIT, PREPARE, MERGE, SORTED };
|
129
|
+
enum class PartitionSortStage : uint8_t { INIT, SCAN, PREPARE, MERGE, SORTED };
|
132
130
|
|
133
131
|
class PartitionLocalMergeState;
|
134
132
|
|
@@ -150,7 +148,11 @@ public:
|
|
150
148
|
PartitionGlobalSinkState &sink;
|
151
149
|
GroupDataPtr group_data;
|
152
150
|
PartitionGlobalHashGroup *hash_group;
|
151
|
+
vector<column_t> column_ids;
|
152
|
+
TupleDataParallelScanState chunk_state;
|
153
153
|
GlobalSortState *global_sort;
|
154
|
+
const idx_t memory_per_thread;
|
155
|
+
const idx_t num_threads;
|
154
156
|
|
155
157
|
private:
|
156
158
|
mutable mutex lock;
|
@@ -162,15 +164,14 @@ private:
|
|
162
164
|
|
163
165
|
class PartitionLocalMergeState {
|
164
166
|
public:
|
165
|
-
PartitionLocalMergeState(
|
166
|
-
finished = true;
|
167
|
-
}
|
167
|
+
explicit PartitionLocalMergeState(PartitionGlobalSinkState &gstate);
|
168
168
|
|
169
169
|
bool TaskFinished() {
|
170
170
|
return finished;
|
171
171
|
}
|
172
172
|
|
173
173
|
void Prepare();
|
174
|
+
void Scan();
|
174
175
|
void Merge();
|
175
176
|
|
176
177
|
void ExecuteTask();
|
@@ -178,6 +179,11 @@ public:
|
|
178
179
|
PartitionGlobalMergeState *merge_state;
|
179
180
|
PartitionSortStage stage;
|
180
181
|
atomic<bool> finished;
|
182
|
+
|
183
|
+
// Sorting buffers
|
184
|
+
ExpressionExecutor executor;
|
185
|
+
DataChunk sort_chunk;
|
186
|
+
DataChunk payload_chunk;
|
181
187
|
};
|
182
188
|
|
183
189
|
class PartitionGlobalMergeStates {
|
@@ -19,6 +19,8 @@ class Allocator;
|
|
19
19
|
class ClientContext;
|
20
20
|
class ExecutionContext;
|
21
21
|
class VectorCache;
|
22
|
+
class FormatSerializer;
|
23
|
+
class FormatDeserializer;
|
22
24
|
|
23
25
|
//! A Data Chunk represents a set of vectors.
|
24
26
|
/*!
|
@@ -141,6 +143,9 @@ public:
|
|
141
143
|
//! Deserializes a blob back into a DataChunk
|
142
144
|
DUCKDB_API void Deserialize(Deserializer &source);
|
143
145
|
|
146
|
+
DUCKDB_API void FormatSerialize(FormatSerializer &serializer) const;
|
147
|
+
DUCKDB_API void FormatDeserialize(FormatDeserializer &source);
|
148
|
+
|
144
149
|
//! Hashes the DataChunk to the target vector
|
145
150
|
DUCKDB_API void Hash(Vector &result);
|
146
151
|
//! Hashes specific vectors of the DataChunk to the target vector
|
@@ -18,11 +18,14 @@ struct robj;
|
|
18
18
|
|
19
19
|
namespace duckdb {
|
20
20
|
|
21
|
-
enum class HLLStorageType { UNCOMPRESSED = 1 };
|
21
|
+
enum class HLLStorageType : uint8_t { UNCOMPRESSED = 1 };
|
22
22
|
|
23
23
|
class FieldWriter;
|
24
24
|
class FieldReader;
|
25
25
|
|
26
|
+
class FormatSerializer;
|
27
|
+
class FormatDeserializer;
|
28
|
+
|
26
29
|
//! The HyperLogLog class holds a HyperLogLog counter for approximate cardinality counting
|
27
30
|
class HyperLogLog {
|
28
31
|
public:
|
@@ -50,6 +53,9 @@ public:
|
|
50
53
|
void Serialize(FieldWriter &writer) const;
|
51
54
|
static unique_ptr<HyperLogLog> Deserialize(FieldReader &reader);
|
52
55
|
|
56
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
57
|
+
static unique_ptr<HyperLogLog> FormatDeserialize(FormatDeserializer &deserializer);
|
58
|
+
|
53
59
|
public:
|
54
60
|
//! Compute HLL hashes over vdata, and store them in 'hashes'
|
55
61
|
//! Then, compute register indices and prefix lengths, and also store them in 'hashes' as a pair of uint32_t
|
@@ -16,6 +16,9 @@ struct dtime_t;
|
|
16
16
|
struct date_t;
|
17
17
|
struct timestamp_t;
|
18
18
|
|
19
|
+
class FormatSerializer;
|
20
|
+
class FormatDeserializer;
|
21
|
+
|
19
22
|
struct interval_t {
|
20
23
|
int32_t months;
|
21
24
|
int32_t days;
|
@@ -24,6 +27,10 @@ struct interval_t {
|
|
24
27
|
inline bool operator==(const interval_t &rhs) const {
|
25
28
|
return this->days == rhs.days && this->months == rhs.months && this->micros == rhs.micros;
|
26
29
|
}
|
30
|
+
|
31
|
+
// Serialization
|
32
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
33
|
+
static interval_t FormatDeserialize(FormatDeserializer &source);
|
27
34
|
};
|
28
35
|
|
29
36
|
//! The Interval class is a static class that holds helper functions for the Interval
|
@@ -8,6 +8,7 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
+
#include "duckdb/common/fixed_size_map.hpp"
|
11
12
|
#include "duckdb/common/perfect_map_set.hpp"
|
12
13
|
#include "duckdb/common/types/row/tuple_data_allocator.hpp"
|
13
14
|
#include "duckdb/common/types/row/tuple_data_collection.hpp"
|
@@ -23,10 +24,11 @@ public:
|
|
23
24
|
public:
|
24
25
|
Vector partition_indices;
|
25
26
|
SelectionVector partition_sel;
|
27
|
+
SelectionVector reverse_partition_sel;
|
26
28
|
|
27
|
-
static constexpr idx_t MAP_THRESHOLD =
|
29
|
+
static constexpr idx_t MAP_THRESHOLD = 256;
|
28
30
|
perfect_map_t<list_entry_t> partition_entries;
|
29
|
-
list_entry_t
|
31
|
+
fixed_size_map_t<list_entry_t> fixed_partition_entries;
|
30
32
|
|
31
33
|
vector<unique_ptr<TupleDataPinState>> partition_pin_states;
|
32
34
|
TupleDataChunkState chunk_state;
|
@@ -51,30 +53,48 @@ public:
|
|
51
53
|
virtual ~PartitionedTupleData();
|
52
54
|
|
53
55
|
public:
|
56
|
+
//! Get the layout of this PartitionedTupleData
|
57
|
+
const TupleDataLayout &GetLayout() const;
|
54
58
|
//! Get the partitioning type of this PartitionedTupleData
|
55
59
|
PartitionedTupleDataType GetType() const;
|
56
60
|
//! Initializes a local state for parallel partitioning that can be merged into this PartitionedTupleData
|
57
61
|
void InitializeAppendState(PartitionedTupleDataAppendState &state,
|
58
62
|
TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE) const;
|
59
63
|
//! Appends a DataChunk to this PartitionedTupleData
|
60
|
-
void Append(PartitionedTupleDataAppendState &state, DataChunk &input
|
64
|
+
void Append(PartitionedTupleDataAppendState &state, DataChunk &input,
|
65
|
+
const SelectionVector &append_sel = *FlatVector::IncrementalSelectionVector(),
|
66
|
+
const idx_t append_count = DConstants::INVALID_INDEX);
|
67
|
+
//! Appends a DataChunk to this PartitionedTupleData
|
68
|
+
//! - ToUnifiedFormat has already been called
|
69
|
+
void AppendUnified(PartitionedTupleDataAppendState &state, DataChunk &input,
|
70
|
+
const SelectionVector &append_sel = *FlatVector::IncrementalSelectionVector(),
|
71
|
+
const idx_t append_count = DConstants::INVALID_INDEX);
|
61
72
|
//! Appends rows to this PartitionedTupleData
|
62
|
-
void Append(PartitionedTupleDataAppendState &state, TupleDataChunkState &input, idx_t count);
|
73
|
+
void Append(PartitionedTupleDataAppendState &state, TupleDataChunkState &input, const idx_t count);
|
63
74
|
//! Flushes any remaining data in the append state into this PartitionedTupleData
|
64
75
|
void FlushAppendState(PartitionedTupleDataAppendState &state);
|
65
76
|
//! Combine another PartitionedTupleData into this PartitionedTupleData
|
66
77
|
void Combine(PartitionedTupleData &other);
|
67
|
-
//!
|
68
|
-
void
|
69
|
-
TupleDataPinProperties properties = TupleDataPinProperties::UNPIN_AFTER_DONE);
|
78
|
+
//! Resets this PartitionedTupleData
|
79
|
+
void Reset();
|
70
80
|
//! Repartition this PartitionedTupleData into the new PartitionedTupleData
|
71
81
|
void Repartition(PartitionedTupleData &new_partitioned_data);
|
82
|
+
//! Unpins the data
|
83
|
+
void Unpin();
|
72
84
|
//! Get the partitions in this PartitionedTupleData
|
73
85
|
vector<unique_ptr<TupleDataCollection>> &GetPartitions();
|
86
|
+
//! Get the data of this PartitionedTupleData as a single unpartitioned TupleDataCollection
|
87
|
+
unique_ptr<TupleDataCollection> GetUnpartitioned();
|
74
88
|
//! Get the count of this PartitionedTupleData
|
75
89
|
idx_t Count() const;
|
76
90
|
//! Get the size (in bytes) of this PartitionedTupleData
|
77
91
|
idx_t SizeInBytes() const;
|
92
|
+
//! Get the number of partitions of this PartitionedTupleData
|
93
|
+
idx_t PartitionCount() const;
|
94
|
+
//! Converts this PartitionedTupleData to a string representation
|
95
|
+
string ToString();
|
96
|
+
//! Prints the string representation of this PartitionedTupleData
|
97
|
+
void Print();
|
78
98
|
|
79
99
|
protected:
|
80
100
|
//===--------------------------------------------------------------------===//
|
@@ -91,7 +111,7 @@ protected:
|
|
91
111
|
throw NotImplementedException("ComputePartitionIndices for this type of PartitionedTupleData");
|
92
112
|
}
|
93
113
|
//! Compute partition indices from rows (similar to function above)
|
94
|
-
virtual void ComputePartitionIndices(Vector &row_locations, idx_t
|
114
|
+
virtual void ComputePartitionIndices(Vector &row_locations, idx_t append_count, Vector &partition_indices) const {
|
95
115
|
throw NotImplementedException("ComputePartitionIndices for this type of PartitionedTupleData");
|
96
116
|
}
|
97
117
|
//! Maximum partition index (optional)
|
@@ -116,11 +136,19 @@ protected:
|
|
116
136
|
|
117
137
|
//! Create a new shared allocator
|
118
138
|
void CreateAllocator();
|
139
|
+
//! Whether to use fixed size map or regular marp
|
140
|
+
bool UseFixedSizeMap() const;
|
119
141
|
//! Builds a selection vector in the Append state for the partitions
|
120
142
|
//! - returns true if everything belongs to the same partition - stores partition index in single_partition_idx
|
121
|
-
void BuildPartitionSel(PartitionedTupleDataAppendState &state,
|
143
|
+
void BuildPartitionSel(PartitionedTupleDataAppendState &state, const SelectionVector &append_sel,
|
144
|
+
const idx_t append_count);
|
145
|
+
template <class MAP_TYPE, class GETTER>
|
146
|
+
void BuildPartitionSel(PartitionedTupleDataAppendState &state, MAP_TYPE &partition_entries,
|
147
|
+
const SelectionVector &append_sel, const idx_t append_count);
|
122
148
|
//! Builds out the buffer space in the partitions
|
123
149
|
void BuildBufferSpace(PartitionedTupleDataAppendState &state);
|
150
|
+
template <class MAP_TYPE, class GETTER>
|
151
|
+
void BuildBufferSpace(PartitionedTupleDataAppendState &state, const MAP_TYPE &partition_entries);
|
124
152
|
//! Create a collection for a specific a partition
|
125
153
|
unique_ptr<TupleDataCollection> CreatePartitionCollection(idx_t partition_index) const {
|
126
154
|
if (allocators) {
|
@@ -129,11 +157,15 @@ protected:
|
|
129
157
|
return make_uniq<TupleDataCollection>(buffer_manager, layout);
|
130
158
|
}
|
131
159
|
}
|
160
|
+
//! Verify count/data size of this PartitionedTupleData
|
161
|
+
void Verify() const;
|
132
162
|
|
133
163
|
protected:
|
134
164
|
PartitionedTupleDataType type;
|
135
165
|
BufferManager &buffer_manager;
|
136
166
|
const TupleDataLayout layout;
|
167
|
+
idx_t count;
|
168
|
+
idx_t data_size;
|
137
169
|
|
138
170
|
mutex lock;
|
139
171
|
shared_ptr<PartitionTupleDataAllocators> allocators;
|
@@ -74,6 +74,11 @@ public:
|
|
74
74
|
return total_count - total_scanned;
|
75
75
|
}
|
76
76
|
|
77
|
+
//! The number of remaining rows
|
78
|
+
inline idx_t BlockIndex() const {
|
79
|
+
return read_state.block_idx;
|
80
|
+
}
|
81
|
+
|
77
82
|
//! Swizzle the blocks for external scanning
|
78
83
|
//! Swizzling is all or nothing, so if we have scanned previously,
|
79
84
|
//! we need to re-swizzle.
|