duckdb 0.7.2-dev1034.0 → 0.7.2-dev1138.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
- package/src/duckdb/src/common/hive_partitioning.cpp +3 -1
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +7 -0
- package/src/duckdb/src/common/serializer/enum_serializer.cpp +6 -6
- package/src/duckdb/src/common/sort/comparators.cpp +14 -5
- package/src/duckdb/src/common/types/interval.cpp +0 -41
- package/src/duckdb/src/common/types/list_segment.cpp +658 -0
- package/src/duckdb/src/common/types/string_heap.cpp +1 -1
- package/src/duckdb/src/common/types/string_type.cpp +1 -1
- package/src/duckdb/src/common/types/vector.cpp +1 -1
- package/src/duckdb/src/common/value_operations/comparison_operations.cpp +14 -22
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +10 -10
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +11 -10
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
- package/src/duckdb/src/execution/index/art/art.cpp +13 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +1 -1
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +2 -0
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +1 -0
- package/src/duckdb/src/execution/operator/join/physical_join.cpp +0 -3
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +5 -1
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +18 -5
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +3 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +2 -1
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +1 -3
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +1 -0
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +1 -0
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -0
- package/src/duckdb/src/function/aggregate/nested/list.cpp +6 -712
- package/src/duckdb/src/function/scalar/list/list_sort.cpp +25 -18
- package/src/duckdb/src/function/table/read_csv.cpp +5 -0
- package/src/duckdb/src/function/table/table_scan.cpp +8 -11
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +45 -149
- package/src/duckdb/src/include/duckdb/common/progress_bar/progress_bar.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +39 -3
- package/src/duckdb/src/include/duckdb/common/types/list_segment.hpp +70 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +73 -3
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +1 -12
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_scan.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_scan.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +0 -2
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/index.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +18 -7
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -3
- package/src/duckdb/src/include/duckdb/storage/table/column_segment_tree.hpp +18 -0
- package/src/duckdb/src/include/duckdb/storage/table/persistent_table_data.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +35 -43
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +18 -5
- package/src/duckdb/src/include/duckdb/storage/table/row_group_segment_tree.hpp +2 -4
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +12 -29
- package/src/duckdb/src/include/duckdb/storage/table/segment_base.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +11 -1
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +0 -4
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +4 -1
- package/src/duckdb/src/include/duckdb.h +21 -0
- package/src/duckdb/src/main/capi/table_function-c.cpp +23 -0
- package/src/duckdb/src/main/settings/settings.cpp +20 -8
- package/src/duckdb/src/optimizer/filter_combiner.cpp +2 -5
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +2 -0
- package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +1 -0
- package/src/duckdb/src/parallel/meta_pipeline.cpp +0 -3
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +22 -0
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
- package/src/duckdb/src/storage/compression/bitpacking.cpp +1 -1
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +2 -1
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +1 -1
- package/src/duckdb/src/storage/compression/rle.cpp +1 -0
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +1 -1
- package/src/duckdb/src/storage/data_table.cpp +3 -3
- package/src/duckdb/src/storage/local_storage.cpp +7 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/column_data.cpp +75 -18
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +3 -1
- package/src/duckdb/src/storage/table/column_segment.cpp +17 -31
- package/src/duckdb/src/storage/table/list_column_data.cpp +9 -12
- package/src/duckdb/src/storage/table/row_group.cpp +200 -136
- package/src/duckdb/src/storage/table/row_group_collection.cpp +75 -45
- package/src/duckdb/src/storage/table/scan_state.cpp +31 -38
- package/src/duckdb/src/storage/table/standard_column_data.cpp +4 -6
- package/src/duckdb/src/storage/table/struct_column_data.cpp +11 -18
- package/src/duckdb/src/storage/table/update_segment.cpp +3 -0
- package/src/duckdb/ub_src_common_types.cpp +2 -0
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
#include "duckdb/common/assert.hpp"
|
|
12
12
|
#include "duckdb/common/constants.hpp"
|
|
13
|
+
#include "duckdb/common/helper.hpp"
|
|
13
14
|
|
|
14
15
|
#include <cstring>
|
|
15
16
|
|
|
@@ -117,10 +118,79 @@ public:
|
|
|
117
118
|
|
|
118
119
|
void Verify() const;
|
|
119
120
|
void VerifyNull() const;
|
|
121
|
+
|
|
122
|
+
struct StringComparisonOperators {
|
|
123
|
+
static inline bool Equals(const string_t &a, const string_t &b) {
|
|
124
|
+
#ifdef DUCKDB_DEBUG_NO_INLINE
|
|
125
|
+
if (a.GetSize() != b.GetSize())
|
|
126
|
+
return false;
|
|
127
|
+
return (memcmp(a.GetDataUnsafe(), b.GetDataUnsafe(), a.GetSize()) == 0);
|
|
128
|
+
#endif
|
|
129
|
+
uint64_t A = Load<uint64_t>((const_data_ptr_t)&a);
|
|
130
|
+
uint64_t B = Load<uint64_t>((const_data_ptr_t)&b);
|
|
131
|
+
if (A != B) {
|
|
132
|
+
// Either length or prefix are different -> not equal
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
// they have the same length and same prefix!
|
|
136
|
+
A = Load<uint64_t>((const_data_ptr_t)&a + 8u);
|
|
137
|
+
B = Load<uint64_t>((const_data_ptr_t)&b + 8u);
|
|
138
|
+
if (A == B) {
|
|
139
|
+
// either they are both inlined (so compare equal) or point to the same string (so compare equal)
|
|
140
|
+
return true;
|
|
141
|
+
}
|
|
142
|
+
if (!a.IsInlined()) {
|
|
143
|
+
// 'long' strings of the same length -> compare pointed value
|
|
144
|
+
if (memcmp(a.value.pointer.ptr, b.value.pointer.ptr, a.GetSize()) == 0) {
|
|
145
|
+
return true;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// either they are short string of same length but different content
|
|
149
|
+
// or they point to string with different content
|
|
150
|
+
// either way, they can't represent the same underlying string
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
153
|
+
// compare up to shared length. if still the same, compare lengths
|
|
154
|
+
static bool GreaterThan(const string_t &left, const string_t &right) {
|
|
155
|
+
const uint32_t left_length = left.GetSize();
|
|
156
|
+
const uint32_t right_length = right.GetSize();
|
|
157
|
+
const uint32_t min_length = std::min<uint32_t>(left_length, right_length);
|
|
158
|
+
|
|
159
|
+
#ifndef DUCKDB_DEBUG_NO_INLINE
|
|
160
|
+
uint32_t A = Load<uint32_t>((const_data_ptr_t)left.GetPrefix());
|
|
161
|
+
uint32_t B = Load<uint32_t>((const_data_ptr_t)right.GetPrefix());
|
|
162
|
+
|
|
163
|
+
// Utility to move 0xa1b2c3d4 into 0xd4c3b2a1, basically inverting the order byte-a-byte
|
|
164
|
+
auto bswap = [](uint32_t v) -> uint32_t {
|
|
165
|
+
uint32_t t1 = (v >> 16u) | (v << 16u);
|
|
166
|
+
uint32_t t2 = t1 & 0x00ff00ff;
|
|
167
|
+
uint32_t t3 = t1 & 0xff00ff00;
|
|
168
|
+
return (t2 << 8u) | (t3 >> 8u);
|
|
169
|
+
};
|
|
170
|
+
|
|
171
|
+
// Check on prefix -----
|
|
172
|
+
// We dont' need to mask since:
|
|
173
|
+
// if the prefix is greater(after bswap), it will stay greater regardless of the extra bytes
|
|
174
|
+
// if the prefix is smaller(after bswap), it will stay smaller regardless of the extra bytes
|
|
175
|
+
// if the prefix is equal, the extra bytes are guaranteed to be /0 for the shorter one
|
|
176
|
+
|
|
177
|
+
if (A != B)
|
|
178
|
+
return bswap(A) > bswap(B);
|
|
179
|
+
#endif
|
|
180
|
+
auto memcmp_res = memcmp(left.GetDataUnsafe(), right.GetDataUnsafe(), min_length);
|
|
181
|
+
return memcmp_res > 0 || (memcmp_res == 0 && left_length > right_length);
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
bool operator==(const string_t &r) const {
|
|
186
|
+
return StringComparisonOperators::Equals(*this, r);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
bool operator>(const string_t &r) const {
|
|
190
|
+
return StringComparisonOperators::GreaterThan(*this, r);
|
|
191
|
+
}
|
|
120
192
|
bool operator<(const string_t &r) const {
|
|
121
|
-
|
|
122
|
-
auto r_str = r.GetString();
|
|
123
|
-
return this_str < r_str;
|
|
193
|
+
return r > *this;
|
|
124
194
|
}
|
|
125
195
|
|
|
126
196
|
private:
|
|
@@ -27,18 +27,7 @@
|
|
|
27
27
|
namespace duckdb {
|
|
28
28
|
|
|
29
29
|
class ConflictManager;
|
|
30
|
-
|
|
31
|
-
struct ARTIndexScanState : public IndexScanState {
|
|
32
|
-
|
|
33
|
-
//! Scan predicates (single predicate scan or range scan)
|
|
34
|
-
Value values[2];
|
|
35
|
-
//! Expressions of the scan predicates
|
|
36
|
-
ExpressionType expressions[2];
|
|
37
|
-
bool checked = false;
|
|
38
|
-
//! All scanned row IDs
|
|
39
|
-
vector<row_t> result_ids;
|
|
40
|
-
Iterator iterator;
|
|
41
|
-
};
|
|
30
|
+
struct ARTIndexScanState;
|
|
42
31
|
|
|
43
32
|
enum class VerifyExistenceType : uint8_t {
|
|
44
33
|
APPEND = 0, // appends to a table
|
|
@@ -89,6 +89,8 @@ struct BufferedCSVReaderOptions {
|
|
|
89
89
|
|
|
90
90
|
//! How many leading rows to skip
|
|
91
91
|
idx_t skip_rows = 0;
|
|
92
|
+
//! Whether or not the skip_rows is set by the user
|
|
93
|
+
bool skip_rows_set = false;
|
|
92
94
|
//! Maximum CSV line size: specified because if we reach this amount, we likely have wrong delimiters (default: 2MB)
|
|
93
95
|
//! note that this is the guaranteed line length that will succeed, longer lines may be accepted if slightly above
|
|
94
96
|
idx_t maximum_line_size = 2097152;
|
|
@@ -116,6 +118,8 @@ struct BufferedCSVReaderOptions {
|
|
|
116
118
|
idx_t buffer_size = CSVBuffer::INITIAL_BUFFER_SIZE_COLOSSAL;
|
|
117
119
|
//! Decimal separator when reading as numeric
|
|
118
120
|
string decimal_separator = ".";
|
|
121
|
+
//! Whether or not to pad rows that do not have enough columns with NULL values
|
|
122
|
+
bool null_padding = true;
|
|
119
123
|
|
|
120
124
|
//===--------------------------------------------------------------------===//
|
|
121
125
|
// WriteCSVOptions
|
|
@@ -40,6 +40,8 @@ struct ClientConfig {
|
|
|
40
40
|
//! to output anything
|
|
41
41
|
bool emit_profiler_output = true;
|
|
42
42
|
|
|
43
|
+
//! system-wide progress bar disable.
|
|
44
|
+
const char *system_progress_bar_disable_reason = nullptr;
|
|
43
45
|
//! If the progress bar is enabled or not.
|
|
44
46
|
bool enable_progress_bar = false;
|
|
45
47
|
//! If the print of the progress bar is enabled
|
|
@@ -34,8 +34,6 @@ struct RowGroupPointer {
|
|
|
34
34
|
uint64_t tuple_count;
|
|
35
35
|
//! The data pointers of the column segments stored in the row group
|
|
36
36
|
vector<BlockPointer> data_pointers;
|
|
37
|
-
//! The per-column statistics of the row group
|
|
38
|
-
vector<BaseStatistics> statistics;
|
|
39
37
|
//! The versions information of the row group (if any)
|
|
40
38
|
shared_ptr<VersionNode> versions;
|
|
41
39
|
};
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
#include "duckdb/common/sort/sort.hpp"
|
|
15
15
|
#include "duckdb/parser/parsed_expression.hpp"
|
|
16
16
|
#include "duckdb/planner/expression.hpp"
|
|
17
|
-
#include "duckdb/storage/table/scan_state.hpp"
|
|
18
17
|
#include "duckdb/storage/meta_block_writer.hpp"
|
|
19
18
|
#include "duckdb/execution/expression_executor.hpp"
|
|
20
19
|
#include "duckdb/common/types/constraint_conflict_info.hpp"
|
|
@@ -27,6 +26,7 @@ class Transaction;
|
|
|
27
26
|
class ConflictManager;
|
|
28
27
|
|
|
29
28
|
struct IndexLock;
|
|
29
|
+
struct IndexScanState;
|
|
30
30
|
|
|
31
31
|
//! The index is an abstract base class that serves as the basis for indexes
|
|
32
32
|
class Index {
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#include "duckdb/storage/buffer_manager.hpp"
|
|
10
10
|
#include "duckdb/storage/checkpoint/string_checkpoint_state.hpp"
|
|
11
11
|
#include "duckdb/storage/segment/uncompressed.hpp"
|
|
12
|
-
|
|
12
|
+
#include "duckdb/storage/table/scan_state.hpp"
|
|
13
13
|
#include "duckdb/storage/string_uncompressed.hpp"
|
|
14
14
|
#include "duckdb/storage/table/append_state.hpp"
|
|
15
15
|
#include "duckdb/storage/table/column_segment.hpp"
|
|
@@ -9,14 +9,12 @@
|
|
|
9
9
|
#pragma once
|
|
10
10
|
|
|
11
11
|
#include "duckdb/common/types/data_chunk.hpp"
|
|
12
|
-
#include "duckdb/storage/table/append_state.hpp"
|
|
13
|
-
#include "duckdb/storage/table/scan_state.hpp"
|
|
14
12
|
#include "duckdb/storage/statistics/base_statistics.hpp"
|
|
15
13
|
#include "duckdb/storage/data_pointer.hpp"
|
|
16
14
|
#include "duckdb/storage/table/persistent_table_data.hpp"
|
|
17
15
|
#include "duckdb/storage/statistics/segment_statistics.hpp"
|
|
18
16
|
#include "duckdb/storage/table/segment_tree.hpp"
|
|
19
|
-
#include "duckdb/storage/table/
|
|
17
|
+
#include "duckdb/storage/table/column_segment_tree.hpp"
|
|
20
18
|
#include "duckdb/common/mutex.hpp"
|
|
21
19
|
|
|
22
20
|
namespace duckdb {
|
|
@@ -36,8 +34,6 @@ struct ColumnCheckpointInfo {
|
|
|
36
34
|
CompressionType compression_type;
|
|
37
35
|
};
|
|
38
36
|
|
|
39
|
-
class ColumnSegmentTree : public SegmentTree<ColumnSegment> {};
|
|
40
|
-
|
|
41
37
|
class ColumnData {
|
|
42
38
|
friend class ColumnDataCheckpointer;
|
|
43
39
|
|
|
@@ -47,14 +43,16 @@ public:
|
|
|
47
43
|
ColumnData(ColumnData &other, idx_t start, ColumnData *parent);
|
|
48
44
|
virtual ~ColumnData();
|
|
49
45
|
|
|
46
|
+
//! The start row
|
|
47
|
+
const idx_t start;
|
|
48
|
+
//! The count of the column data
|
|
49
|
+
idx_t count;
|
|
50
50
|
//! The block manager
|
|
51
51
|
BlockManager &block_manager;
|
|
52
52
|
//! Table info for the column
|
|
53
53
|
DataTableInfo &info;
|
|
54
54
|
//! The column index of the column, either within the parent table or within the parent
|
|
55
55
|
idx_t column_index;
|
|
56
|
-
//! The start row
|
|
57
|
-
idx_t start;
|
|
58
56
|
//! The type of the column
|
|
59
57
|
LogicalType type;
|
|
60
58
|
//! The parent column (if any)
|
|
@@ -63,6 +61,9 @@ public:
|
|
|
63
61
|
public:
|
|
64
62
|
virtual bool CheckZonemap(ColumnScanState &state, TableFilter &filter) = 0;
|
|
65
63
|
|
|
64
|
+
BlockManager &GetBlockManager() {
|
|
65
|
+
return block_manager;
|
|
66
|
+
}
|
|
66
67
|
DatabaseInstance &GetDatabase() const;
|
|
67
68
|
DataTableInfo &GetTableInfo() const;
|
|
68
69
|
virtual idx_t GetMaxEntry();
|
|
@@ -96,6 +97,8 @@ public:
|
|
|
96
97
|
virtual void InitializeAppend(ColumnAppendState &state);
|
|
97
98
|
//! Append a vector of type [type] to the end of the column
|
|
98
99
|
virtual void Append(BaseStatistics &stats, ColumnAppendState &state, Vector &vector, idx_t count);
|
|
100
|
+
//! Append a vector of type [type] to the end of the column
|
|
101
|
+
void Append(ColumnAppendState &state, Vector &vector, idx_t count);
|
|
99
102
|
virtual void AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata, idx_t count);
|
|
100
103
|
//! Revert a set of appends to the ColumnData
|
|
101
104
|
virtual void RevertAppend(row_t start_row);
|
|
@@ -130,6 +133,8 @@ public:
|
|
|
130
133
|
virtual void GetStorageInfo(idx_t row_group_index, vector<idx_t> col_path, TableStorageInfo &result);
|
|
131
134
|
virtual void Verify(RowGroup &parent);
|
|
132
135
|
|
|
136
|
+
bool CheckZonemap(TableFilter &filter);
|
|
137
|
+
|
|
133
138
|
static shared_ptr<ColumnData> CreateColumn(BlockManager &block_manager, DataTableInfo &info, idx_t column_index,
|
|
134
139
|
idx_t start_row, const LogicalType &type, ColumnData *parent = nullptr);
|
|
135
140
|
static shared_ptr<ColumnData> CreateColumn(ColumnData &other, idx_t start_row, ColumnData *parent = nullptr);
|
|
@@ -138,6 +143,10 @@ public:
|
|
|
138
143
|
ColumnData *parent = nullptr);
|
|
139
144
|
static unique_ptr<ColumnData> CreateColumnUnique(ColumnData &other, idx_t start_row, ColumnData *parent = nullptr);
|
|
140
145
|
|
|
146
|
+
void MergeStatistics(const BaseStatistics &other);
|
|
147
|
+
void MergeIntoStatistics(BaseStatistics &other);
|
|
148
|
+
unique_ptr<BaseStatistics> GetStatistics();
|
|
149
|
+
|
|
141
150
|
protected:
|
|
142
151
|
//! Append a transient segment
|
|
143
152
|
void AppendTransientSegment(SegmentLock &l, idx_t start_row);
|
|
@@ -158,6 +167,8 @@ protected:
|
|
|
158
167
|
unique_ptr<UpdateSegment> updates;
|
|
159
168
|
//! The internal version of the column data
|
|
160
169
|
idx_t version;
|
|
170
|
+
//! The stats of the root segment
|
|
171
|
+
unique_ptr<SegmentStatistics> stats;
|
|
161
172
|
};
|
|
162
173
|
|
|
163
174
|
} // namespace duckdb
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
#include "duckdb/storage/buffer_manager.hpp"
|
|
15
15
|
#include "duckdb/storage/statistics/segment_statistics.hpp"
|
|
16
16
|
#include "duckdb/storage/storage_lock.hpp"
|
|
17
|
-
#include "duckdb/storage/table/scan_state.hpp"
|
|
18
17
|
#include "duckdb/function/compression_function.hpp"
|
|
19
18
|
#include "duckdb/storage/table/segment_base.hpp"
|
|
20
19
|
|
|
@@ -39,8 +38,6 @@ class ColumnSegment : public SegmentBase<ColumnSegment> {
|
|
|
39
38
|
public:
|
|
40
39
|
~ColumnSegment();
|
|
41
40
|
|
|
42
|
-
//! The index within the segment tree
|
|
43
|
-
idx_t index;
|
|
44
41
|
//! The database instance
|
|
45
42
|
DatabaseInstance &db;
|
|
46
43
|
//! The type stored in the column
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
|
2
|
+
// DuckDB
|
|
3
|
+
//
|
|
4
|
+
// duckdb/storage/table/column_segment_tree.hpp
|
|
5
|
+
//
|
|
6
|
+
//
|
|
7
|
+
//===----------------------------------------------------------------------===//
|
|
8
|
+
|
|
9
|
+
#pragma once
|
|
10
|
+
|
|
11
|
+
#include "duckdb/storage/table/segment_tree.hpp"
|
|
12
|
+
#include "duckdb/storage/table/column_segment.hpp"
|
|
13
|
+
|
|
14
|
+
namespace duckdb {
|
|
15
|
+
|
|
16
|
+
class ColumnSegmentTree : public SegmentTree<ColumnSegment> {};
|
|
17
|
+
|
|
18
|
+
} // namespace duckdb
|
|
@@ -10,13 +10,13 @@
|
|
|
10
10
|
|
|
11
11
|
#include "duckdb/common/vector_size.hpp"
|
|
12
12
|
#include "duckdb/storage/table/chunk_info.hpp"
|
|
13
|
-
#include "duckdb/storage/table/append_state.hpp"
|
|
14
|
-
#include "duckdb/storage/table/scan_state.hpp"
|
|
15
13
|
#include "duckdb/storage/statistics/segment_statistics.hpp"
|
|
14
|
+
#include "duckdb/common/types/data_chunk.hpp"
|
|
16
15
|
#include "duckdb/common/enums/scan_options.hpp"
|
|
17
16
|
#include "duckdb/common/mutex.hpp"
|
|
18
17
|
#include "duckdb/parser/column_list.hpp"
|
|
19
18
|
#include "duckdb/storage/table/segment_base.hpp"
|
|
19
|
+
#include "duckdb/storage/block.hpp"
|
|
20
20
|
|
|
21
21
|
namespace duckdb {
|
|
22
22
|
class AttachedDatabase;
|
|
@@ -27,6 +27,7 @@ class DataTable;
|
|
|
27
27
|
class PartialBlockManager;
|
|
28
28
|
struct DataTableInfo;
|
|
29
29
|
class ExpressionExecutor;
|
|
30
|
+
class RowGroupCollection;
|
|
30
31
|
class RowGroupWriter;
|
|
31
32
|
class UpdateSegment;
|
|
32
33
|
class TableStatistics;
|
|
@@ -36,6 +37,10 @@ struct ColumnCheckpointState;
|
|
|
36
37
|
struct RowGroupPointer;
|
|
37
38
|
struct TransactionData;
|
|
38
39
|
struct VersionNode;
|
|
40
|
+
class CollectionScanState;
|
|
41
|
+
class TableFilterSet;
|
|
42
|
+
struct ColumnFetchState;
|
|
43
|
+
struct RowGroupAppendState;
|
|
39
44
|
|
|
40
45
|
struct RowGroupWriteData {
|
|
41
46
|
vector<unique_ptr<ColumnCheckpointState>> states;
|
|
@@ -52,51 +57,33 @@ public:
|
|
|
52
57
|
static constexpr const idx_t ROW_GROUP_VECTOR_COUNT = ROW_GROUP_SIZE / STANDARD_VECTOR_SIZE;
|
|
53
58
|
|
|
54
59
|
public:
|
|
55
|
-
RowGroup(
|
|
56
|
-
RowGroup(
|
|
57
|
-
|
|
58
|
-
RowGroup(RowGroup &row_group, idx_t start);
|
|
60
|
+
RowGroup(RowGroupCollection &collection, idx_t start, idx_t count);
|
|
61
|
+
RowGroup(RowGroupCollection &collection, RowGroupPointer &&pointer);
|
|
62
|
+
RowGroup(RowGroup &row_group, RowGroupCollection &collection, idx_t start);
|
|
59
63
|
~RowGroup();
|
|
60
64
|
|
|
61
|
-
//! The index within the segment tree
|
|
62
|
-
idx_t index;
|
|
63
|
-
|
|
64
65
|
private:
|
|
65
|
-
//! The
|
|
66
|
-
|
|
67
|
-
//! The block manager
|
|
68
|
-
BlockManager &block_manager;
|
|
69
|
-
//! The table info of this row_group
|
|
70
|
-
DataTableInfo &table_info;
|
|
66
|
+
//! The RowGroupCollection this row-group is a part of
|
|
67
|
+
RowGroupCollection &collection;
|
|
71
68
|
//! The version info of the row_group (inserted and deleted tuple info)
|
|
72
69
|
shared_ptr<VersionNode> version_info;
|
|
73
70
|
//! The column data of the row_group
|
|
74
71
|
vector<shared_ptr<ColumnData>> columns;
|
|
75
|
-
//! The segment statistics for each of the columns
|
|
76
|
-
vector<SegmentStatistics> stats;
|
|
77
72
|
|
|
78
73
|
public:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
return block_manager;
|
|
82
|
-
}
|
|
83
|
-
DataTableInfo &GetTableInfo() {
|
|
84
|
-
return table_info;
|
|
85
|
-
}
|
|
86
|
-
idx_t GetColumnIndex(ColumnData *data) {
|
|
87
|
-
for (idx_t i = 0; i < columns.size(); i++) {
|
|
88
|
-
if (columns[i].get() == data) {
|
|
89
|
-
return i;
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
return 0;
|
|
74
|
+
RowGroupCollection &GetCollection() {
|
|
75
|
+
return collection;
|
|
93
76
|
}
|
|
77
|
+
DatabaseInstance &GetDatabase();
|
|
78
|
+
BlockManager &GetBlockManager();
|
|
79
|
+
DataTableInfo &GetTableInfo();
|
|
94
80
|
|
|
95
|
-
unique_ptr<RowGroup> AlterType(const LogicalType &target_type, idx_t changed_idx,
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
81
|
+
unique_ptr<RowGroup> AlterType(RowGroupCollection &collection, const LogicalType &target_type, idx_t changed_idx,
|
|
82
|
+
ExpressionExecutor &executor, CollectionScanState &scan_state,
|
|
83
|
+
DataChunk &scan_chunk);
|
|
84
|
+
unique_ptr<RowGroup> AddColumn(RowGroupCollection &collection, ColumnDefinition &new_column,
|
|
85
|
+
ExpressionExecutor &executor, Expression *default_value, Vector &intermediate);
|
|
86
|
+
unique_ptr<RowGroup> RemoveColumn(RowGroupCollection &collection, idx_t removed_column);
|
|
100
87
|
|
|
101
88
|
void CommitDrop();
|
|
102
89
|
void CommitDropColumn(idx_t index);
|
|
@@ -104,16 +91,16 @@ public:
|
|
|
104
91
|
void InitializeEmpty(const vector<LogicalType> &types);
|
|
105
92
|
|
|
106
93
|
//! Initialize a scan over this row_group
|
|
107
|
-
bool InitializeScan(
|
|
108
|
-
bool InitializeScanWithOffset(
|
|
94
|
+
bool InitializeScan(CollectionScanState &state);
|
|
95
|
+
bool InitializeScanWithOffset(CollectionScanState &state, idx_t vector_offset);
|
|
109
96
|
//! Checks the given set of table filters against the row-group statistics. Returns false if the entire row group
|
|
110
97
|
//! can be skipped.
|
|
111
98
|
bool CheckZonemap(TableFilterSet &filters, const vector<column_t> &column_ids);
|
|
112
99
|
//! Checks the given set of table filters against the per-segment statistics. Returns false if any segments were
|
|
113
100
|
//! skipped.
|
|
114
|
-
bool CheckZonemapSegments(
|
|
115
|
-
void Scan(TransactionData transaction,
|
|
116
|
-
void ScanCommitted(
|
|
101
|
+
bool CheckZonemapSegments(CollectionScanState &state);
|
|
102
|
+
void Scan(TransactionData transaction, CollectionScanState &state, DataChunk &result);
|
|
103
|
+
void ScanCommitted(CollectionScanState &state, DataChunk &result, TableScanType type);
|
|
117
104
|
|
|
118
105
|
idx_t GetSelVector(TransactionData transaction, idx_t vector_idx, SelectionVector &sel_vector, idx_t max_count);
|
|
119
106
|
idx_t GetCommittedSelVector(transaction_t start_time, transaction_t transaction_id, idx_t vector_idx,
|
|
@@ -158,13 +145,16 @@ public:
|
|
|
158
145
|
|
|
159
146
|
void Verify();
|
|
160
147
|
|
|
161
|
-
void NextVector(
|
|
148
|
+
void NextVector(CollectionScanState &state);
|
|
162
149
|
|
|
163
150
|
private:
|
|
164
151
|
ChunkInfo *GetChunkInfo(idx_t vector_idx);
|
|
152
|
+
ColumnData &GetColumn(idx_t c);
|
|
153
|
+
idx_t GetColumnCount() const;
|
|
154
|
+
vector<shared_ptr<ColumnData>> &GetColumns();
|
|
165
155
|
|
|
166
156
|
template <TableScanType TYPE>
|
|
167
|
-
void TemplatedScan(TransactionData transaction,
|
|
157
|
+
void TemplatedScan(TransactionData transaction, CollectionScanState &state, DataChunk &result);
|
|
168
158
|
|
|
169
159
|
static void CheckpointDeletes(VersionNode *versions, Serializer &serializer);
|
|
170
160
|
static shared_ptr<VersionNode> DeserializeDeletes(Deserializer &source);
|
|
@@ -172,6 +162,8 @@ private:
|
|
|
172
162
|
private:
|
|
173
163
|
mutex row_group_lock;
|
|
174
164
|
mutex stats_lock;
|
|
165
|
+
vector<BlockPointer> column_pointers;
|
|
166
|
+
unique_ptr<atomic<bool>[]> is_loaded;
|
|
175
167
|
};
|
|
176
168
|
|
|
177
169
|
struct VersionNode {
|
|
@@ -15,14 +15,16 @@
|
|
|
15
15
|
|
|
16
16
|
namespace duckdb {
|
|
17
17
|
struct ParallelTableScanState;
|
|
18
|
-
|
|
18
|
+
struct ParallelCollectionScanState;
|
|
19
|
+
class CreateIndexScanState;
|
|
20
|
+
class CollectionScanState;
|
|
19
21
|
class PersistentTableData;
|
|
20
22
|
class TableDataWriter;
|
|
21
23
|
class TableIndexList;
|
|
22
24
|
class TableStatistics;
|
|
23
|
-
|
|
25
|
+
struct TableAppendState;
|
|
26
|
+
class DuckTransaction;
|
|
24
27
|
class BoundConstraint;
|
|
25
|
-
|
|
26
28
|
class RowGroupSegmentTree;
|
|
27
29
|
|
|
28
30
|
class RowGroupCollection {
|
|
@@ -48,8 +50,8 @@ public:
|
|
|
48
50
|
void InitializeCreateIndexScan(CreateIndexScanState &state);
|
|
49
51
|
void InitializeScanWithOffset(CollectionScanState &state, const vector<column_t> &column_ids, idx_t start_row,
|
|
50
52
|
idx_t end_row);
|
|
51
|
-
static bool InitializeScanInRowGroup(CollectionScanState &state,
|
|
52
|
-
idx_t vector_index, idx_t max_row);
|
|
53
|
+
static bool InitializeScanInRowGroup(CollectionScanState &state, RowGroupCollection &collection,
|
|
54
|
+
RowGroup &row_group, idx_t vector_index, idx_t max_row);
|
|
53
55
|
void InitializeParallelScan(ParallelCollectionScanState &state);
|
|
54
56
|
bool NextParallelScan(ClientContext &context, ParallelCollectionScanState &state, CollectionScanState &scan_state);
|
|
55
57
|
|
|
@@ -99,6 +101,15 @@ public:
|
|
|
99
101
|
unique_ptr<BaseStatistics> CopyStats(column_t column_id);
|
|
100
102
|
void SetDistinct(column_t column_id, unique_ptr<DistinctStatistics> distinct_stats);
|
|
101
103
|
|
|
104
|
+
AttachedDatabase &GetAttached();
|
|
105
|
+
DatabaseInstance &GetDatabase();
|
|
106
|
+
BlockManager &GetBlockManager() {
|
|
107
|
+
return block_manager;
|
|
108
|
+
}
|
|
109
|
+
DataTableInfo &GetTableInfo() {
|
|
110
|
+
return *info;
|
|
111
|
+
}
|
|
112
|
+
|
|
102
113
|
private:
|
|
103
114
|
bool IsEmpty(SegmentLock &) const;
|
|
104
115
|
|
|
@@ -107,7 +118,9 @@ private:
|
|
|
107
118
|
BlockManager &block_manager;
|
|
108
119
|
//! The number of rows in the table
|
|
109
120
|
atomic<idx_t> total_rows;
|
|
121
|
+
//! The data table info
|
|
110
122
|
shared_ptr<DataTableInfo> info;
|
|
123
|
+
//! The column types of the row group collection
|
|
111
124
|
vector<LogicalType> types;
|
|
112
125
|
idx_t row_start;
|
|
113
126
|
//! The segment trees holding the various row_groups of the table
|
|
@@ -18,7 +18,7 @@ class MetaBlockReader;
|
|
|
18
18
|
|
|
19
19
|
class RowGroupSegmentTree : public SegmentTree<RowGroup, true> {
|
|
20
20
|
public:
|
|
21
|
-
RowGroupSegmentTree(
|
|
21
|
+
RowGroupSegmentTree(RowGroupCollection &collection);
|
|
22
22
|
~RowGroupSegmentTree() override;
|
|
23
23
|
|
|
24
24
|
void Initialize(PersistentTableData &data);
|
|
@@ -26,9 +26,7 @@ public:
|
|
|
26
26
|
protected:
|
|
27
27
|
unique_ptr<RowGroup> LoadSegment() override;
|
|
28
28
|
|
|
29
|
-
|
|
30
|
-
BlockManager &block_manager;
|
|
31
|
-
vector<LogicalType> column_types;
|
|
29
|
+
RowGroupCollection &collection;
|
|
32
30
|
idx_t current_row_group;
|
|
33
31
|
idx_t max_row_group;
|
|
34
32
|
unique_ptr<MetaBlockReader> reader;
|
|
@@ -72,12 +72,11 @@ struct ColumnScanState {
|
|
|
72
72
|
idx_t last_offset = 0;
|
|
73
73
|
|
|
74
74
|
public:
|
|
75
|
+
void Initialize(const LogicalType &type);
|
|
75
76
|
//! Move the scan state forward by "count" rows (including all child states)
|
|
76
77
|
void Next(idx_t count);
|
|
77
78
|
//! Move ONLY this state forward by "count" rows (i.e. not the child states)
|
|
78
79
|
void NextInternal(idx_t count);
|
|
79
|
-
//! Move the scan state forward by STANDARD_VECTOR_SIZE rows
|
|
80
|
-
void NextVector();
|
|
81
80
|
};
|
|
82
81
|
|
|
83
82
|
struct ColumnFetchState {
|
|
@@ -89,39 +88,18 @@ struct ColumnFetchState {
|
|
|
89
88
|
BufferHandle &GetOrInsertHandle(ColumnSegment &segment);
|
|
90
89
|
};
|
|
91
90
|
|
|
92
|
-
class
|
|
91
|
+
class CollectionScanState {
|
|
93
92
|
public:
|
|
94
|
-
|
|
95
|
-
: row_group(nullptr), vector_index(0), max_row(0), parent(parent_p) {
|
|
96
|
-
}
|
|
93
|
+
CollectionScanState(TableScanState &parent_p);
|
|
97
94
|
|
|
98
95
|
//! The current row_group we are scanning
|
|
99
|
-
RowGroup *row_group
|
|
96
|
+
RowGroup *row_group;
|
|
100
97
|
//! The vector index within the row_group
|
|
101
|
-
idx_t vector_index
|
|
102
|
-
//! The maximum row
|
|
103
|
-
idx_t
|
|
98
|
+
idx_t vector_index;
|
|
99
|
+
//! The maximum row within the row group
|
|
100
|
+
idx_t max_row_group_row;
|
|
104
101
|
//! Child column scans
|
|
105
102
|
unique_ptr<ColumnScanState[]> column_scans;
|
|
106
|
-
|
|
107
|
-
public:
|
|
108
|
-
const vector<column_t> &GetColumnIds();
|
|
109
|
-
TableFilterSet *GetFilters();
|
|
110
|
-
AdaptiveFilter *GetAdaptiveFilter();
|
|
111
|
-
idx_t GetParentMaxRow();
|
|
112
|
-
|
|
113
|
-
private:
|
|
114
|
-
//! The parent scan state
|
|
115
|
-
CollectionScanState &parent;
|
|
116
|
-
};
|
|
117
|
-
|
|
118
|
-
class CollectionScanState {
|
|
119
|
-
public:
|
|
120
|
-
CollectionScanState(TableScanState &parent_p)
|
|
121
|
-
: row_group_state(*this), row_groups(nullptr), max_row(0), batch_index(0), parent(parent_p) {};
|
|
122
|
-
|
|
123
|
-
//! The row_group scan state
|
|
124
|
-
RowGroupScanState row_group_state;
|
|
125
103
|
//! Row group segment tree
|
|
126
104
|
RowGroupSegmentTree *row_groups;
|
|
127
105
|
//! The total maximum row index
|
|
@@ -130,6 +108,7 @@ public:
|
|
|
130
108
|
idx_t batch_index;
|
|
131
109
|
|
|
132
110
|
public:
|
|
111
|
+
void Initialize(const vector<LogicalType> &types);
|
|
133
112
|
const vector<column_t> &GetColumnIds();
|
|
134
113
|
TableFilterSet *GetFilters();
|
|
135
114
|
AdaptiveFilter *GetAdaptiveFilter();
|
|
@@ -167,12 +146,16 @@ private:
|
|
|
167
146
|
};
|
|
168
147
|
|
|
169
148
|
struct ParallelCollectionScanState {
|
|
149
|
+
ParallelCollectionScanState();
|
|
150
|
+
|
|
170
151
|
//! The row group collection we are scanning
|
|
171
152
|
RowGroupCollection *collection;
|
|
172
153
|
RowGroup *current_row_group;
|
|
173
154
|
idx_t vector_index;
|
|
174
155
|
idx_t max_row;
|
|
175
156
|
idx_t batch_index;
|
|
157
|
+
atomic<idx_t> processed_rows;
|
|
158
|
+
mutex lock;
|
|
176
159
|
};
|
|
177
160
|
|
|
178
161
|
struct ParallelTableScanState {
|