duckdb 1.4.3-dev0.0 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +14 -5
- package/src/duckdb/extension/parquet/column_writer.cpp +4 -4
- package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +12 -4
- package/src/duckdb/src/common/encryption_key_manager.cpp +4 -0
- package/src/duckdb/src/common/local_file_system.cpp +23 -0
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +6 -0
- package/src/duckdb/src/common/types/conflict_manager.cpp +1 -1
- package/src/duckdb/src/execution/index/art/base_node.cpp +3 -1
- package/src/duckdb/src/execution/index/art/prefix.cpp +5 -8
- package/src/duckdb/src/execution/index/bound_index.cpp +68 -25
- package/src/duckdb/src/execution/index/unbound_index.cpp +21 -10
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +4 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +36 -28
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +3 -2
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +12 -6
- package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +8 -4
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +4 -3
- package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +3 -2
- package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +0 -1
- package/src/duckdb/src/execution/physical_plan/plan_window.cpp +6 -8
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +4 -3
- package/src/duckdb/src/function/macro_function.cpp +20 -2
- package/src/duckdb/src/function/table/system/duckdb_log.cpp +3 -0
- package/src/duckdb/src/function/table/system/test_all_types.cpp +26 -13
- package/src/duckdb/src/function/table/table_scan.cpp +72 -38
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/table_function.cpp +24 -0
- package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/limits.hpp +4 -2
- package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +41 -7
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +15 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -1
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +4 -4
- package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +3 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +2 -6
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +4 -1
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
- package/src/duckdb/src/logging/log_storage.cpp +17 -23
- package/src/duckdb/src/main/capi/duckdb-c.cpp +1 -1
- package/src/duckdb/src/main/connection.cpp +0 -5
- package/src/duckdb/src/main/database_manager.cpp +12 -9
- package/src/duckdb/src/main/db_instance_cache.cpp +15 -1
- package/src/duckdb/src/main/extension/extension_alias.cpp +1 -0
- package/src/duckdb/src/optimizer/filter_combiner.cpp +38 -4
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -15
- package/src/duckdb/src/optimizer/late_materialization.cpp +5 -0
- package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +6 -3
- package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +3 -2
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +4 -1
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +17 -10
- package/src/duckdb/src/planner/binder.cpp +3 -3
- package/src/duckdb/src/planner/bound_result_modifier.cpp +22 -5
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +4 -1
- package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder.cpp +1 -2
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +57 -24
- package/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp +5 -3
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +9 -0
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/table/chunk_info.cpp +3 -3
- package/src/duckdb/src/storage/table/column_data.cpp +5 -1
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +1 -1
- package/src/duckdb/src/storage/table/column_segment.cpp +3 -1
- package/src/duckdb/src/storage/table/row_group.cpp +6 -8
- package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -1
- package/src/duckdb/src/storage/table/row_version_manager.cpp +37 -23
- package/src/duckdb/src/storage/table/standard_column_data.cpp +5 -5
- package/src/duckdb/src/storage/table/validity_column_data.cpp +17 -0
|
@@ -18,11 +18,43 @@ class ColumnDataCollection;
|
|
|
18
18
|
|
|
19
19
|
enum class BufferedIndexReplay : uint8_t { INSERT_ENTRY = 0, DEL_ENTRY = 1 };
|
|
20
20
|
|
|
21
|
-
struct
|
|
21
|
+
struct ReplayRange {
|
|
22
22
|
BufferedIndexReplay type;
|
|
23
|
-
|
|
23
|
+
// [start, end) - start is inclusive, end is exclusive for the range within the ColumnDataCollection
|
|
24
|
+
// buffer for operations to replay for this range.
|
|
25
|
+
idx_t start;
|
|
26
|
+
idx_t end;
|
|
27
|
+
explicit ReplayRange(const BufferedIndexReplay replay_type, const idx_t start_p, const idx_t end_p)
|
|
28
|
+
: type(replay_type), start(start_p), end(end_p) {
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
// All inserts and deletes to be replayed are stored in their respective buffers.
|
|
33
|
+
// Since the inserts and deletes may be interleaved, however, ranges stores the ordering of operations
|
|
34
|
+
// and their offsets in the respective buffer.
|
|
35
|
+
// Simple example:
|
|
36
|
+
// ranges[0] - INSERT_ENTRY, [0,6)
|
|
37
|
+
// ranges[1] - DEL_ENTRY, [0,3)
|
|
38
|
+
// ranges[2] - INSERT_ENTRY [6,12)
|
|
39
|
+
// So even though the buffered_inserts has all the insert data from [0,12), ranges gives us the intervals for
|
|
40
|
+
// replaying the index operations in the right order.
|
|
41
|
+
struct BufferedIndexReplays {
|
|
42
|
+
vector<ReplayRange> ranges;
|
|
43
|
+
unique_ptr<ColumnDataCollection> buffered_inserts;
|
|
44
|
+
unique_ptr<ColumnDataCollection> buffered_deletes;
|
|
45
|
+
|
|
46
|
+
BufferedIndexReplays() = default;
|
|
47
|
+
|
|
48
|
+
unique_ptr<ColumnDataCollection> &GetBuffer(const BufferedIndexReplay replay_type) {
|
|
49
|
+
if (replay_type == BufferedIndexReplay::INSERT_ENTRY) {
|
|
50
|
+
return buffered_inserts;
|
|
51
|
+
}
|
|
52
|
+
return buffered_deletes;
|
|
53
|
+
}
|
|
24
54
|
|
|
25
|
-
|
|
55
|
+
bool HasBufferedReplays() const {
|
|
56
|
+
return !ranges.empty();
|
|
57
|
+
}
|
|
26
58
|
};
|
|
27
59
|
|
|
28
60
|
class UnboundIndex final : public Index {
|
|
@@ -31,8 +63,9 @@ private:
|
|
|
31
63
|
unique_ptr<CreateInfo> create_info;
|
|
32
64
|
//! The serialized storage information of the index.
|
|
33
65
|
IndexStorageInfo storage_info;
|
|
34
|
-
|
|
35
|
-
|
|
66
|
+
|
|
67
|
+
//! Buffered for index operations during WAL replay. They are replayed upon index binding.
|
|
68
|
+
BufferedIndexReplays buffered_replays;
|
|
36
69
|
|
|
37
70
|
//! Maps the column IDs in the buffered replays to a physical table offset.
|
|
38
71
|
//! For example, column [i] in a buffered ColumnDataCollection is the data for an Indexed column with
|
|
@@ -78,12 +111,13 @@ public:
|
|
|
78
111
|
void BufferChunk(DataChunk &index_column_chunk, Vector &row_ids, const vector<StorageIndex> &mapped_column_ids_p,
|
|
79
112
|
BufferedIndexReplay replay_type);
|
|
80
113
|
bool HasBufferedReplays() const {
|
|
81
|
-
return
|
|
114
|
+
return buffered_replays.HasBufferedReplays();
|
|
82
115
|
}
|
|
83
116
|
|
|
84
|
-
|
|
117
|
+
BufferedIndexReplays &GetBufferedReplays() {
|
|
85
118
|
return buffered_replays;
|
|
86
119
|
}
|
|
120
|
+
|
|
87
121
|
const vector<StorageIndex> &GetMappedColumnIds() const {
|
|
88
122
|
return mapped_column_ids;
|
|
89
123
|
}
|
|
@@ -121,6 +121,8 @@ public:
|
|
|
121
121
|
|
|
122
122
|
virtual ~BaseScanner() = default;
|
|
123
123
|
|
|
124
|
+
void Print() const;
|
|
125
|
+
|
|
124
126
|
//! Returns true if the scanner is finished
|
|
125
127
|
bool FinishedFile() const;
|
|
126
128
|
|
|
@@ -164,10 +166,15 @@ public:
|
|
|
164
166
|
//! States
|
|
165
167
|
CSVStates states;
|
|
166
168
|
|
|
169
|
+
//! If the scanner ever entered a quoted state
|
|
167
170
|
bool ever_quoted = false;
|
|
168
171
|
|
|
172
|
+
//! If the scanner ever entered an escaped state.
|
|
169
173
|
bool ever_escaped = false;
|
|
170
174
|
|
|
175
|
+
//! If the scanner ever used advantage of the non-strict mode.
|
|
176
|
+
bool used_unstrictness = false;
|
|
177
|
+
|
|
171
178
|
//! Shared pointer to the buffer_manager, this is shared across multiple scanners
|
|
172
179
|
shared_ptr<CSVBufferManager> buffer_manager;
|
|
173
180
|
|
|
@@ -302,6 +309,9 @@ protected:
|
|
|
302
309
|
!state_machine->dialect_options.state_machine_options.strict_mode.GetValue())) {
|
|
303
310
|
// We only set the ever escaped variable if this is either a quote char OR strict mode is off
|
|
304
311
|
ever_escaped = true;
|
|
312
|
+
if (states.states[0] == CSVState::UNQUOTED_ESCAPE) {
|
|
313
|
+
used_unstrictness = true;
|
|
314
|
+
}
|
|
305
315
|
}
|
|
306
316
|
ever_quoted = true;
|
|
307
317
|
T::SetQuoted(result, iterator.pos.buffer_pos);
|
|
@@ -332,11 +342,15 @@ protected:
|
|
|
332
342
|
break;
|
|
333
343
|
}
|
|
334
344
|
case CSVState::ESCAPE:
|
|
335
|
-
case CSVState::UNQUOTED_ESCAPE:
|
|
336
345
|
case CSVState::ESCAPED_RETURN:
|
|
337
346
|
T::SetEscaped(result);
|
|
338
347
|
iterator.pos.buffer_pos++;
|
|
339
348
|
break;
|
|
349
|
+
case CSVState::UNQUOTED_ESCAPE:
|
|
350
|
+
T::SetEscaped(result);
|
|
351
|
+
iterator.pos.buffer_pos++;
|
|
352
|
+
used_unstrictness = true;
|
|
353
|
+
break;
|
|
340
354
|
case CSVState::STANDARD: {
|
|
341
355
|
iterator.pos.buffer_pos++;
|
|
342
356
|
while (iterator.pos.buffer_pos + 8 < to_pos) {
|
package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
CHANGED
|
@@ -116,6 +116,7 @@ private:
|
|
|
116
116
|
//! Highest number of columns found
|
|
117
117
|
idx_t max_columns_found = 0;
|
|
118
118
|
idx_t max_columns_found_error = 0;
|
|
119
|
+
bool best_candidate_is_strict = false;
|
|
119
120
|
//! Current Candidates being considered
|
|
120
121
|
vector<unique_ptr<ColumnCountScanner>> candidates;
|
|
121
122
|
//! Reference to original CSV Options, it will be modified as a result of the sniffer.
|
package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp
CHANGED
|
@@ -176,7 +176,7 @@ public:
|
|
|
176
176
|
const shared_ptr<CSVBufferHandle> &buffer_handle, Allocator &buffer_allocator,
|
|
177
177
|
idx_t result_size_p, idx_t buffer_position, CSVErrorHandler &error_handler, CSVIterator &iterator,
|
|
178
178
|
bool store_line_size, shared_ptr<CSVFileScan> csv_file_scan, idx_t &lines_read, bool sniffing,
|
|
179
|
-
string path, idx_t scan_id);
|
|
179
|
+
const string &path, idx_t scan_id, bool &used_unstrictness);
|
|
180
180
|
|
|
181
181
|
~StringValueResult();
|
|
182
182
|
|
|
@@ -225,6 +225,7 @@ public:
|
|
|
225
225
|
|
|
226
226
|
shared_ptr<CSVFileScan> csv_file_scan;
|
|
227
227
|
idx_t &lines_read;
|
|
228
|
+
bool &used_unstrictness;
|
|
228
229
|
//! Information regarding projected columns
|
|
229
230
|
unsafe_unique_array<bool> projected_columns;
|
|
230
231
|
bool projecting_columns = false;
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
|
12
12
|
#include "duckdb/execution/physical_operator.hpp"
|
|
13
|
+
#include "duckdb/parser/group_by_node.hpp"
|
|
13
14
|
#include "duckdb/planner/logical_operator.hpp"
|
|
14
15
|
#include "duckdb/planner/logical_tokens.hpp"
|
|
15
16
|
#include "duckdb/planner/joinside.hpp"
|
|
@@ -152,7 +153,8 @@ protected:
|
|
|
152
153
|
PhysicalOperator &PlanComparisonJoin(LogicalComparisonJoin &op);
|
|
153
154
|
PhysicalOperator &PlanDelimJoin(LogicalComparisonJoin &op);
|
|
154
155
|
PhysicalOperator &ExtractAggregateExpressions(PhysicalOperator &child, vector<unique_ptr<Expression>> &expressions,
|
|
155
|
-
vector<unique_ptr<Expression>> &groups
|
|
156
|
+
vector<unique_ptr<Expression>> &groups,
|
|
157
|
+
optional_ptr<vector<GroupingSet>> grouping_sets);
|
|
156
158
|
|
|
157
159
|
private:
|
|
158
160
|
ClientContext &context;
|
|
@@ -70,7 +70,8 @@ public:
|
|
|
70
70
|
AggregateType aggr_type = AggregateType::NON_DISTINCT);
|
|
71
71
|
|
|
72
72
|
DUCKDB_API static void BindSortedAggregate(ClientContext &context, BoundAggregateExpression &expr,
|
|
73
|
-
const vector<unique_ptr<Expression>> &groups
|
|
73
|
+
const vector<unique_ptr<Expression>> &groups,
|
|
74
|
+
optional_ptr<vector<GroupingSet>> grouping_sets);
|
|
74
75
|
DUCKDB_API static void BindSortedAggregate(ClientContext &context, BoundWindowExpression &expr);
|
|
75
76
|
|
|
76
77
|
//! Cast a set of expressions to the arguments of this function
|
|
@@ -432,6 +432,8 @@ public:
|
|
|
432
432
|
TableFunctionInitialization global_initialization = TableFunctionInitialization::INITIALIZE_ON_EXECUTE;
|
|
433
433
|
|
|
434
434
|
DUCKDB_API bool Equal(const TableFunction &rhs) const;
|
|
435
|
+
DUCKDB_API bool operator==(const TableFunction &rhs) const;
|
|
436
|
+
DUCKDB_API bool operator!=(const TableFunction &rhs) const;
|
|
435
437
|
};
|
|
436
438
|
|
|
437
439
|
} // namespace duckdb
|
|
@@ -26,6 +26,8 @@ struct DatabaseCacheEntry {
|
|
|
26
26
|
mutex update_database_mutex;
|
|
27
27
|
};
|
|
28
28
|
|
|
29
|
+
enum class CacheBehavior { AUTOMATIC, ALWAYS_CACHE, NEVER_CACHE };
|
|
30
|
+
|
|
29
31
|
class DBInstanceCache {
|
|
30
32
|
public:
|
|
31
33
|
DBInstanceCache();
|
|
@@ -41,6 +43,9 @@ public:
|
|
|
41
43
|
//! Either returns an existing entry, or creates and caches a new DB Instance
|
|
42
44
|
shared_ptr<DuckDB> GetOrCreateInstance(const string &database, DBConfig &config_dict, bool cache_instance,
|
|
43
45
|
const std::function<void(DuckDB &)> &on_create = nullptr);
|
|
46
|
+
shared_ptr<DuckDB> GetOrCreateInstance(const string &database, DBConfig &config_dict,
|
|
47
|
+
CacheBehavior cache_behavior = CacheBehavior::AUTOMATIC,
|
|
48
|
+
const std::function<void(DuckDB &)> &on_create = nullptr);
|
|
44
49
|
|
|
45
50
|
private:
|
|
46
51
|
shared_ptr<DatabaseFilePathManager> path_manager;
|
|
@@ -1045,6 +1045,8 @@ static constexpr ExtensionEntry EXTENSION_SETTINGS[] = {
|
|
|
1045
1045
|
{"http_retry_wait_ms", "httpfs"},
|
|
1046
1046
|
{"http_timeout", "httpfs"},
|
|
1047
1047
|
{"httpfs_client_implementation", "httpfs"},
|
|
1048
|
+
{"iceberg_via_aws_sdk_for_catalog_interactions", "iceberg"},
|
|
1049
|
+
{"merge_http_secret_into_s3_request", "httpfs"},
|
|
1048
1050
|
{"mysql_bit1_as_boolean", "mysql_scanner"},
|
|
1049
1051
|
{"mysql_debug_show_queries", "mysql_scanner"},
|
|
1050
1052
|
{"mysql_experimental_filter_pushdown", "mysql_scanner"},
|
|
@@ -50,6 +50,7 @@ public:
|
|
|
50
50
|
//! If this returns true - this sorts "in_list" as a side-effect
|
|
51
51
|
static bool IsDenseRange(vector<Value> &in_list);
|
|
52
52
|
static bool ContainsNull(vector<Value> &in_list);
|
|
53
|
+
static bool FindNextLegalUTF8(string &prefix_string);
|
|
53
54
|
|
|
54
55
|
void GenerateFilters(const std::function<void(unique_ptr<Expression> filter)> &callback);
|
|
55
56
|
bool HasFilters();
|
|
@@ -57,10 +57,10 @@ public:
|
|
|
57
57
|
bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
|
|
58
58
|
void AddRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
|
|
59
59
|
//! Add an unnest relation which can come from a logical unnest or a logical get which has an unnest function
|
|
60
|
-
void
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
void AddRelationWithChildren(JoinOrderOptimizer &optimizer, LogicalOperator &op, LogicalOperator &input_op,
|
|
61
|
+
optional_ptr<LogicalOperator> parent, RelationStats &child_stats,
|
|
62
|
+
optional_ptr<LogicalOperator> limit_op,
|
|
63
|
+
vector<reference<LogicalOperator>> &datasource_filters);
|
|
64
64
|
void AddAggregateOrWindowRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent,
|
|
65
65
|
const RelationStats &stats, LogicalOperatorType op_type);
|
|
66
66
|
vector<unique_ptr<SingleJoinRelation>> GetRelations();
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
#include "duckdb/optimizer/rule.hpp"
|
|
12
12
|
#include "duckdb/parser/expression_map.hpp"
|
|
13
|
+
#include "duckdb/parser/group_by_node.hpp"
|
|
13
14
|
|
|
14
15
|
namespace duckdb {
|
|
15
16
|
|
|
@@ -18,7 +19,8 @@ public:
|
|
|
18
19
|
explicit OrderedAggregateOptimizer(ExpressionRewriter &rewriter);
|
|
19
20
|
|
|
20
21
|
static unique_ptr<Expression> Apply(ClientContext &context, BoundAggregateExpression &aggr,
|
|
21
|
-
vector<unique_ptr<Expression>> &groups,
|
|
22
|
+
vector<unique_ptr<Expression>> &groups,
|
|
23
|
+
optional_ptr<vector<GroupingSet>> grouping_sets, bool &changes_made);
|
|
22
24
|
unique_ptr<Expression> Apply(LogicalOperator &op, vector<reference<Expression>> &bindings, bool &changes_made,
|
|
23
25
|
bool is_root) override;
|
|
24
26
|
};
|
|
@@ -23,6 +23,9 @@ enum class SampleMethod : uint8_t { SYSTEM_SAMPLE = 0, BERNOULLI_SAMPLE = 1, RES
|
|
|
23
23
|
string SampleMethodToString(SampleMethod method);
|
|
24
24
|
|
|
25
25
|
class SampleOptions {
|
|
26
|
+
public:
|
|
27
|
+
// 1 billion rows should be enough.
|
|
28
|
+
static constexpr idx_t MAX_SAMPLE_ROWS = 1000000000;
|
|
26
29
|
|
|
27
30
|
public:
|
|
28
31
|
explicit SampleOptions(int64_t seed_ = -1);
|
|
@@ -405,7 +405,7 @@ private:
|
|
|
405
405
|
|
|
406
406
|
unique_ptr<QueryNode> BindTableMacro(FunctionExpression &function, TableMacroCatalogEntry ¯o_func, idx_t depth);
|
|
407
407
|
|
|
408
|
-
unique_ptr<BoundCTENode> BindMaterializedCTE(CommonTableExpressionMap &cte_map);
|
|
408
|
+
unique_ptr<BoundCTENode> BindMaterializedCTE(CommonTableExpressionMap &cte_map, unique_ptr<CTENode> &cte_root);
|
|
409
409
|
unique_ptr<BoundCTENode> BindCTE(CTENode &statement);
|
|
410
410
|
|
|
411
411
|
unique_ptr<BoundQueryNode> BindNode(SelectNode &node);
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
#pragma once
|
|
10
10
|
|
|
11
11
|
#include "duckdb/common/limits.hpp"
|
|
12
|
+
#include "duckdb/parser/group_by_node.hpp"
|
|
12
13
|
#include "duckdb/parser/result_modifier.hpp"
|
|
13
14
|
#include "duckdb/planner/bound_statement.hpp"
|
|
14
15
|
#include "duckdb/planner/expression.hpp"
|
|
@@ -155,8 +156,9 @@ public:
|
|
|
155
156
|
|
|
156
157
|
//! Remove unneeded/duplicate order elements.
|
|
157
158
|
//! Returns true of orders is not empty.
|
|
158
|
-
static bool Simplify(vector<BoundOrderByNode> &orders, const vector<unique_ptr<Expression>> &groups
|
|
159
|
-
|
|
159
|
+
static bool Simplify(vector<BoundOrderByNode> &orders, const vector<unique_ptr<Expression>> &groups,
|
|
160
|
+
optional_ptr<vector<GroupingSet>> grouping_sets);
|
|
161
|
+
bool Simplify(const vector<unique_ptr<Expression>> &groups, optional_ptr<vector<GroupingSet>> grouping_sets);
|
|
160
162
|
};
|
|
161
163
|
|
|
162
164
|
enum class DistinctType : uint8_t { DISTINCT = 0, DISTINCT_ON = 1 };
|
|
@@ -10,9 +10,8 @@
|
|
|
10
10
|
|
|
11
11
|
#include "duckdb/common/exception.hpp"
|
|
12
12
|
#include "duckdb/common/stack_checker.hpp"
|
|
13
|
-
#include "duckdb/common/exception/binder_exception.hpp"
|
|
14
13
|
#include "duckdb/common/error_data.hpp"
|
|
15
|
-
#include "duckdb/common/
|
|
14
|
+
#include "duckdb/common/exception/binder_exception.hpp"
|
|
16
15
|
#include "duckdb/parser/expression/bound_expression.hpp"
|
|
17
16
|
#include "duckdb/parser/expression/lambdaref_expression.hpp"
|
|
18
17
|
#include "duckdb/parser/parsed_expression.hpp"
|
|
@@ -33,7 +33,7 @@ private:
|
|
|
33
33
|
bool parent_is_dependent_join = false);
|
|
34
34
|
|
|
35
35
|
//! Mark entire subtree of Logical Operators as correlated by adding them to the has_correlated_expressions map.
|
|
36
|
-
bool MarkSubtreeCorrelated(LogicalOperator &op);
|
|
36
|
+
bool MarkSubtreeCorrelated(LogicalOperator &op, idx_t cte_index);
|
|
37
37
|
|
|
38
38
|
//! Push the dependent join down a LogicalOperator
|
|
39
39
|
unique_ptr<LogicalOperator> PushDownDependentJoin(unique_ptr<LogicalOperator> plan,
|
|
@@ -17,13 +17,15 @@ namespace duckdb {
|
|
|
17
17
|
//! Helper class to rewrite correlated cte scans within a single LogicalOperator
|
|
18
18
|
class RewriteCTEScan : public LogicalOperatorVisitor {
|
|
19
19
|
public:
|
|
20
|
-
RewriteCTEScan(idx_t table_index, const CorrelatedColumns &correlated_columns
|
|
20
|
+
RewriteCTEScan(idx_t table_index, const CorrelatedColumns &correlated_columns,
|
|
21
|
+
bool rewrite_dependent_joins = false);
|
|
21
22
|
|
|
22
23
|
void VisitOperator(LogicalOperator &op) override;
|
|
23
24
|
|
|
24
25
|
private:
|
|
25
26
|
idx_t table_index;
|
|
26
27
|
const CorrelatedColumns &correlated_columns;
|
|
28
|
+
bool rewrite_dependent_joins = false;
|
|
27
29
|
};
|
|
28
30
|
|
|
29
31
|
} // namespace duckdb
|
|
@@ -45,7 +45,7 @@ public:
|
|
|
45
45
|
virtual bool Fetch(TransactionData transaction, row_t row) = 0;
|
|
46
46
|
virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0;
|
|
47
47
|
virtual idx_t GetCommittedDeletedCount(idx_t max_count) = 0;
|
|
48
|
-
virtual bool Cleanup(transaction_t lowest_transaction
|
|
48
|
+
virtual bool Cleanup(transaction_t lowest_transaction) const;
|
|
49
49
|
|
|
50
50
|
virtual bool HasDeletes() const = 0;
|
|
51
51
|
|
|
@@ -87,7 +87,7 @@ public:
|
|
|
87
87
|
bool Fetch(TransactionData transaction, row_t row) override;
|
|
88
88
|
void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override;
|
|
89
89
|
idx_t GetCommittedDeletedCount(idx_t max_count) override;
|
|
90
|
-
bool Cleanup(transaction_t lowest_transaction
|
|
90
|
+
bool Cleanup(transaction_t lowest_transaction) const override;
|
|
91
91
|
|
|
92
92
|
bool HasDeletes() const override;
|
|
93
93
|
|
|
@@ -124,7 +124,7 @@ public:
|
|
|
124
124
|
SelectionVector &sel_vector, idx_t max_count) override;
|
|
125
125
|
bool Fetch(TransactionData transaction, row_t row) override;
|
|
126
126
|
void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override;
|
|
127
|
-
bool Cleanup(transaction_t lowest_transaction
|
|
127
|
+
bool Cleanup(transaction_t lowest_transaction) const override;
|
|
128
128
|
idx_t GetCommittedDeletedCount(idx_t max_count) override;
|
|
129
129
|
|
|
130
130
|
void Append(idx_t start, idx_t end, transaction_t commit_id);
|
|
@@ -99,10 +99,6 @@ public:
|
|
|
99
99
|
|
|
100
100
|
const vector<MetaBlockPointer> &GetColumnStartPointers() const;
|
|
101
101
|
|
|
102
|
-
//! Returns the list of meta block pointers used by the deletes
|
|
103
|
-
const vector<MetaBlockPointer> &GetDeletesPointers() const {
|
|
104
|
-
return deletes_pointers;
|
|
105
|
-
}
|
|
106
102
|
BlockManager &GetBlockManager();
|
|
107
103
|
DataTableInfo &GetTableInfo();
|
|
108
104
|
|
|
@@ -198,6 +194,8 @@ public:
|
|
|
198
194
|
|
|
199
195
|
static FilterPropagateResult CheckRowIdFilter(const TableFilter &filter, idx_t beg_row, idx_t end_row);
|
|
200
196
|
|
|
197
|
+
vector<MetaBlockPointer> CheckpointDeletes(MetadataManager &manager);
|
|
198
|
+
|
|
201
199
|
private:
|
|
202
200
|
optional_ptr<RowVersionManager> GetVersionInfo();
|
|
203
201
|
shared_ptr<RowVersionManager> GetOrCreateVersionInfoPtr();
|
|
@@ -214,8 +212,6 @@ private:
|
|
|
214
212
|
template <TableScanType TYPE>
|
|
215
213
|
void TemplatedScan(TransactionData transaction, CollectionScanState &state, DataChunk &result);
|
|
216
214
|
|
|
217
|
-
vector<MetaBlockPointer> CheckpointDeletes(MetadataManager &manager);
|
|
218
|
-
|
|
219
215
|
bool HasUnloadedDeletes() const;
|
|
220
216
|
|
|
221
217
|
private:
|
|
@@ -46,11 +46,14 @@ public:
|
|
|
46
46
|
static shared_ptr<RowVersionManager> Deserialize(MetaBlockPointer delete_pointer, MetadataManager &manager,
|
|
47
47
|
idx_t start);
|
|
48
48
|
|
|
49
|
+
bool HasUnserializedChanges();
|
|
50
|
+
vector<MetaBlockPointer> GetStoragePointers();
|
|
51
|
+
|
|
49
52
|
private:
|
|
50
53
|
mutex version_lock;
|
|
51
54
|
idx_t start;
|
|
52
55
|
vector<unique_ptr<ChunkInfo>> vector_info;
|
|
53
|
-
bool
|
|
56
|
+
bool has_unserialized_changes;
|
|
54
57
|
vector<MetaBlockPointer> storage_pointers;
|
|
55
58
|
|
|
56
59
|
private:
|
|
@@ -23,6 +23,8 @@ public:
|
|
|
23
23
|
public:
|
|
24
24
|
FilterPropagateResult CheckZonemap(ColumnScanState &state, TableFilter &filter) override;
|
|
25
25
|
void AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata, idx_t count) override;
|
|
26
|
+
void UpdateWithBase(TransactionData transaction, DataTable &data_table, idx_t column_index, Vector &update_vector,
|
|
27
|
+
row_t *row_ids, idx_t update_count, ColumnData &base);
|
|
26
28
|
};
|
|
27
29
|
|
|
28
30
|
} // namespace duckdb
|
|
@@ -22,26 +22,19 @@ namespace duckdb {
|
|
|
22
22
|
|
|
23
23
|
vector<LogicalType> LogStorage::GetSchema(LoggingTargetTable table) {
|
|
24
24
|
switch (table) {
|
|
25
|
-
case LoggingTargetTable::ALL_LOGS:
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
LogicalType::UBIGINT, // query_id
|
|
32
|
-
LogicalType::UBIGINT, // thread
|
|
33
|
-
LogicalType::TIMESTAMP, // timestamp
|
|
34
|
-
LogicalType::VARCHAR, // log_type
|
|
35
|
-
LogicalType::VARCHAR, // level
|
|
36
|
-
LogicalType::VARCHAR, // message
|
|
37
|
-
};
|
|
25
|
+
case LoggingTargetTable::ALL_LOGS: {
|
|
26
|
+
auto all_logs = GetSchema(LoggingTargetTable::LOG_CONTEXTS);
|
|
27
|
+
auto log_entries = GetSchema(LoggingTargetTable::LOG_ENTRIES);
|
|
28
|
+
all_logs.insert(all_logs.end(), log_entries.begin() + 1, log_entries.end());
|
|
29
|
+
return all_logs;
|
|
30
|
+
}
|
|
38
31
|
case LoggingTargetTable::LOG_ENTRIES:
|
|
39
32
|
return {
|
|
40
|
-
LogicalType::UBIGINT,
|
|
41
|
-
LogicalType::
|
|
42
|
-
LogicalType::VARCHAR,
|
|
43
|
-
LogicalType::VARCHAR,
|
|
44
|
-
LogicalType::VARCHAR,
|
|
33
|
+
LogicalType::UBIGINT, // context_id
|
|
34
|
+
LogicalType::TIMESTAMP_TZ, // timestamp
|
|
35
|
+
LogicalType::VARCHAR, // log_type
|
|
36
|
+
LogicalType::VARCHAR, // level
|
|
37
|
+
LogicalType::VARCHAR, // message
|
|
45
38
|
};
|
|
46
39
|
case LoggingTargetTable::LOG_CONTEXTS:
|
|
47
40
|
return {
|
|
@@ -59,11 +52,12 @@ vector<LogicalType> LogStorage::GetSchema(LoggingTargetTable table) {
|
|
|
59
52
|
|
|
60
53
|
vector<string> LogStorage::GetColumnNames(LoggingTargetTable table) {
|
|
61
54
|
switch (table) {
|
|
62
|
-
case LoggingTargetTable::ALL_LOGS:
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
55
|
+
case LoggingTargetTable::ALL_LOGS: {
|
|
56
|
+
auto all_logs = GetColumnNames(LoggingTargetTable::LOG_CONTEXTS);
|
|
57
|
+
auto log_entries = GetColumnNames(LoggingTargetTable::LOG_ENTRIES);
|
|
58
|
+
all_logs.insert(all_logs.end(), log_entries.begin() + 1, log_entries.end());
|
|
59
|
+
return all_logs;
|
|
60
|
+
}
|
|
67
61
|
case LoggingTargetTable::LOG_ENTRIES:
|
|
68
62
|
return {"context_id", "timestamp", "type", "log_level", "message"};
|
|
69
63
|
case LoggingTargetTable::LOG_CONTEXTS:
|
|
@@ -41,7 +41,7 @@ duckdb_state duckdb_open_internal(DBInstanceCacheWrapper *cache, const char *pat
|
|
|
41
41
|
if (path) {
|
|
42
42
|
path_str = path;
|
|
43
43
|
}
|
|
44
|
-
wrapper->database = cache->instance_cache->GetOrCreateInstance(path_str, *db_config
|
|
44
|
+
wrapper->database = cache->instance_cache->GetOrCreateInstance(path_str, *db_config);
|
|
45
45
|
} else {
|
|
46
46
|
wrapper->database = duckdb::make_shared_ptr<DuckDB>(path, db_config);
|
|
47
47
|
}
|
|
@@ -23,11 +23,6 @@ Connection::Connection(DatabaseInstance &database)
|
|
|
23
23
|
auto &connection_manager = ConnectionManager::Get(database);
|
|
24
24
|
connection_manager.AddConnection(*context);
|
|
25
25
|
connection_manager.AssignConnectionId(*this);
|
|
26
|
-
|
|
27
|
-
#ifdef DEBUG
|
|
28
|
-
EnableProfiling();
|
|
29
|
-
context->config.emit_profiler_output = false;
|
|
30
|
-
#endif
|
|
31
26
|
}
|
|
32
27
|
|
|
33
28
|
Connection::Connection(DuckDB &database) : Connection(*database.instance) {
|
|
@@ -83,7 +83,16 @@ shared_ptr<AttachedDatabase> DatabaseManager::GetDatabaseInternal(const lock_gua
|
|
|
83
83
|
|
|
84
84
|
shared_ptr<AttachedDatabase> DatabaseManager::AttachDatabase(ClientContext &context, AttachInfo &info,
|
|
85
85
|
AttachOptions &options) {
|
|
86
|
-
|
|
86
|
+
string extension = "";
|
|
87
|
+
if (FileSystem::IsRemoteFile(info.path, extension)) {
|
|
88
|
+
if (options.access_mode == AccessMode::AUTOMATIC) {
|
|
89
|
+
// Attaching of remote files gets bumped to READ_ONLY
|
|
90
|
+
// This is due to the fact that on most (all?) remote files writes to DB are not available
|
|
91
|
+
// and having this raised later is not super helpful
|
|
92
|
+
options.access_mode = AccessMode::READ_ONLY;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
87
96
|
if (options.db_type.empty() || StringUtil::CIEquals(options.db_type, "duckdb")) {
|
|
88
97
|
while (InsertDatabasePath(info, options) == InsertDatabasePathResult::ALREADY_EXISTS) {
|
|
89
98
|
// database with this name and path already exists
|
|
@@ -99,6 +108,7 @@ shared_ptr<AttachedDatabase> DatabaseManager::AttachDatabase(ClientContext &cont
|
|
|
99
108
|
}
|
|
100
109
|
}
|
|
101
110
|
}
|
|
111
|
+
auto &config = DBConfig::GetConfig(context);
|
|
102
112
|
GetDatabaseType(context, info, config, options);
|
|
103
113
|
if (!options.db_type.empty()) {
|
|
104
114
|
// we only need to prevent duplicate opening of DuckDB files
|
|
@@ -108,18 +118,11 @@ shared_ptr<AttachedDatabase> DatabaseManager::AttachDatabase(ClientContext &cont
|
|
|
108
118
|
if (AttachedDatabase::NameIsReserved(info.name)) {
|
|
109
119
|
throw BinderException("Attached database name \"%s\" cannot be used because it is a reserved name", info.name);
|
|
110
120
|
}
|
|
111
|
-
|
|
112
|
-
if (FileSystem::IsRemoteFile(info.path, extension)) {
|
|
121
|
+
if (!extension.empty()) {
|
|
113
122
|
if (!ExtensionHelper::TryAutoLoadExtension(context, extension)) {
|
|
114
123
|
throw MissingExtensionException("Attaching path '%s' requires extension '%s' to be loaded", info.path,
|
|
115
124
|
extension);
|
|
116
125
|
}
|
|
117
|
-
if (options.access_mode == AccessMode::AUTOMATIC) {
|
|
118
|
-
// Attaching of remote files gets bumped to READ_ONLY
|
|
119
|
-
// This is due to the fact that on most (all?) remote files writes to DB are not available
|
|
120
|
-
// and having this raised later is not super helpful
|
|
121
|
-
options.access_mode = AccessMode::READ_ONLY;
|
|
122
|
-
}
|
|
123
126
|
}
|
|
124
127
|
|
|
125
128
|
// now create the attached database
|
|
@@ -137,9 +137,23 @@ shared_ptr<DuckDB> DBInstanceCache::CreateInstance(const string &database, DBCon
|
|
|
137
137
|
shared_ptr<DuckDB> DBInstanceCache::GetOrCreateInstance(const string &database, DBConfig &config_dict,
|
|
138
138
|
bool cache_instance,
|
|
139
139
|
const std::function<void(DuckDB &)> &on_create) {
|
|
140
|
+
auto cache_behavior = cache_instance ? CacheBehavior::ALWAYS_CACHE : CacheBehavior::NEVER_CACHE;
|
|
141
|
+
return GetOrCreateInstance(database, config_dict, cache_behavior, on_create);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
shared_ptr<DuckDB> DBInstanceCache::GetOrCreateInstance(const string &database, DBConfig &config_dict,
|
|
145
|
+
CacheBehavior cache_behavior,
|
|
146
|
+
const std::function<void(DuckDB &)> &on_create) {
|
|
140
147
|
unique_lock<mutex> lock(cache_lock, std::defer_lock);
|
|
148
|
+
bool cache_instance = cache_behavior == CacheBehavior::ALWAYS_CACHE;
|
|
149
|
+
if (cache_behavior == CacheBehavior::AUTOMATIC) {
|
|
150
|
+
// cache all unnamed in-memory connections
|
|
151
|
+
cache_instance = true;
|
|
152
|
+
if (database == IN_MEMORY_PATH || database.empty()) {
|
|
153
|
+
cache_instance = false;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
141
156
|
if (cache_instance) {
|
|
142
|
-
|
|
143
157
|
// While we do not own the lock, we cannot definitively say that the database instance does not exist.
|
|
144
158
|
while (!lock.owns_lock()) {
|
|
145
159
|
// The problem is, that we have to unlock the mutex in GetInstanceInternal, so we can non-blockingly wait
|
|
@@ -10,6 +10,7 @@ static const ExtensionAlias internal_aliases[] = {{"http", "httpfs"}, // httpfs
|
|
|
10
10
|
{"postgres", "postgres_scanner"}, // postgres
|
|
11
11
|
{"sqlite", "sqlite_scanner"}, // sqlite
|
|
12
12
|
{"sqlite3", "sqlite_scanner"},
|
|
13
|
+
{"uc_catalog", "unity_catalog"}, // old name for compatibility
|
|
13
14
|
{nullptr, nullptr}};
|
|
14
15
|
|
|
15
16
|
idx_t ExtensionHelper::ExtensionAliasCount() {
|