duckdb 1.4.3-dev0.0 → 1.4.4-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/core_functions/aggregate/holistic/approximate_quantile.cpp +1 -1
  3. package/src/duckdb/extension/icu/icu_extension.cpp +14 -5
  4. package/src/duckdb/extension/parquet/column_writer.cpp +4 -4
  5. package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +12 -4
  6. package/src/duckdb/src/common/encryption_key_manager.cpp +4 -0
  7. package/src/duckdb/src/common/local_file_system.cpp +23 -0
  8. package/src/duckdb/src/common/types/column/column_data_collection.cpp +6 -0
  9. package/src/duckdb/src/common/types/conflict_manager.cpp +1 -1
  10. package/src/duckdb/src/execution/index/art/base_node.cpp +3 -1
  11. package/src/duckdb/src/execution/index/art/prefix.cpp +5 -8
  12. package/src/duckdb/src/execution/index/bound_index.cpp +68 -25
  13. package/src/duckdb/src/execution/index/unbound_index.cpp +21 -10
  14. package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +4 -0
  15. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +36 -28
  16. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +3 -2
  17. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +12 -6
  18. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +8 -4
  19. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +1 -1
  20. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +4 -3
  21. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +3 -2
  22. package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +0 -1
  23. package/src/duckdb/src/execution/physical_plan/plan_window.cpp +6 -8
  24. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +4 -3
  25. package/src/duckdb/src/function/macro_function.cpp +20 -2
  26. package/src/duckdb/src/function/table/system/duckdb_log.cpp +3 -0
  27. package/src/duckdb/src/function/table/system/test_all_types.cpp +26 -13
  28. package/src/duckdb/src/function/table/table_scan.cpp +72 -38
  29. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  30. package/src/duckdb/src/function/table_function.cpp +24 -0
  31. package/src/duckdb/src/include/duckdb/common/encryption_key_manager.hpp +1 -0
  32. package/src/duckdb/src/include/duckdb/common/limits.hpp +4 -2
  33. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +2 -0
  34. package/src/duckdb/src/include/duckdb/common/types/row/block_iterator.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/execution/index/art/art_operator.hpp +2 -0
  36. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
  37. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +1 -1
  38. package/src/duckdb/src/include/duckdb/execution/index/unbound_index.hpp +41 -7
  39. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +15 -1
  40. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -0
  41. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -1
  42. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +3 -1
  43. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +2 -1
  44. package/src/duckdb/src/include/duckdb/function/table_function.hpp +2 -0
  45. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +5 -0
  46. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +2 -0
  47. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_manager.hpp +4 -4
  49. package/src/duckdb/src/include/duckdb/optimizer/rule/ordered_aggregate_optimizer.hpp +3 -1
  50. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +3 -0
  51. package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
  52. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +4 -2
  53. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +1 -2
  54. package/src/duckdb/src/include/duckdb/planner/subquery/flatten_dependent_join.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/planner/subquery/rewrite_cte_scan.hpp +3 -1
  56. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +3 -3
  57. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +2 -6
  58. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +4 -1
  59. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
  60. package/src/duckdb/src/logging/log_storage.cpp +17 -23
  61. package/src/duckdb/src/main/capi/duckdb-c.cpp +1 -1
  62. package/src/duckdb/src/main/connection.cpp +0 -5
  63. package/src/duckdb/src/main/database_manager.cpp +12 -9
  64. package/src/duckdb/src/main/db_instance_cache.cpp +15 -1
  65. package/src/duckdb/src/main/extension/extension_alias.cpp +1 -0
  66. package/src/duckdb/src/optimizer/filter_combiner.cpp +38 -4
  67. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -15
  68. package/src/duckdb/src/optimizer/late_materialization.cpp +5 -0
  69. package/src/duckdb/src/optimizer/rule/ordered_aggregate_optimizer.cpp +6 -3
  70. package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +3 -2
  71. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +1 -1
  72. package/src/duckdb/src/planner/binder/query_node/plan_select_node.cpp +1 -1
  73. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +4 -1
  74. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +17 -10
  75. package/src/duckdb/src/planner/binder.cpp +3 -3
  76. package/src/duckdb/src/planner/bound_result_modifier.cpp +22 -5
  77. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +4 -1
  78. package/src/duckdb/src/planner/expression_binder/constant_binder.cpp +1 -1
  79. package/src/duckdb/src/planner/expression_binder.cpp +1 -2
  80. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +57 -24
  81. package/src/duckdb/src/planner/subquery/rewrite_cte_scan.cpp +5 -3
  82. package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +9 -0
  83. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  84. package/src/duckdb/src/storage/table/chunk_info.cpp +3 -3
  85. package/src/duckdb/src/storage/table/column_data.cpp +5 -1
  86. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +1 -1
  87. package/src/duckdb/src/storage/table/column_segment.cpp +3 -1
  88. package/src/duckdb/src/storage/table/row_group.cpp +6 -8
  89. package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -1
  90. package/src/duckdb/src/storage/table/row_version_manager.cpp +37 -23
  91. package/src/duckdb/src/storage/table/standard_column_data.cpp +5 -5
  92. package/src/duckdb/src/storage/table/validity_column_data.cpp +17 -0
@@ -18,11 +18,43 @@ class ColumnDataCollection;
18
18
 
19
19
  enum class BufferedIndexReplay : uint8_t { INSERT_ENTRY = 0, DEL_ENTRY = 1 };
20
20
 
21
- struct BufferedIndexData {
21
+ struct ReplayRange {
22
22
  BufferedIndexReplay type;
23
- unique_ptr<ColumnDataCollection> data;
23
+ // [start, end) - start is inclusive, end is exclusive for the range within the ColumnDataCollection
24
+ // buffer for operations to replay for this range.
25
+ idx_t start;
26
+ idx_t end;
27
+ explicit ReplayRange(const BufferedIndexReplay replay_type, const idx_t start_p, const idx_t end_p)
28
+ : type(replay_type), start(start_p), end(end_p) {
29
+ }
30
+ };
31
+
32
+ // All inserts and deletes to be replayed are stored in their respective buffers.
33
+ // Since the inserts and deletes may be interleaved, however, ranges stores the ordering of operations
34
+ // and their offsets in the respective buffer.
35
+ // Simple example:
36
+ // ranges[0] - INSERT_ENTRY, [0,6)
37
+ // ranges[1] - DEL_ENTRY, [0,3)
38
+ // ranges[2] - INSERT_ENTRY [6,12)
39
+ // So even though the buffered_inserts has all the insert data from [0,12), ranges gives us the intervals for
40
+ // replaying the index operations in the right order.
41
+ struct BufferedIndexReplays {
42
+ vector<ReplayRange> ranges;
43
+ unique_ptr<ColumnDataCollection> buffered_inserts;
44
+ unique_ptr<ColumnDataCollection> buffered_deletes;
45
+
46
+ BufferedIndexReplays() = default;
47
+
48
+ unique_ptr<ColumnDataCollection> &GetBuffer(const BufferedIndexReplay replay_type) {
49
+ if (replay_type == BufferedIndexReplay::INSERT_ENTRY) {
50
+ return buffered_inserts;
51
+ }
52
+ return buffered_deletes;
53
+ }
24
54
 
25
- BufferedIndexData(BufferedIndexReplay replay_type, unique_ptr<ColumnDataCollection> data_p);
55
+ bool HasBufferedReplays() const {
56
+ return !ranges.empty();
57
+ }
26
58
  };
27
59
 
28
60
  class UnboundIndex final : public Index {
@@ -31,8 +63,9 @@ private:
31
63
  unique_ptr<CreateInfo> create_info;
32
64
  //! The serialized storage information of the index.
33
65
  IndexStorageInfo storage_info;
34
- //! Buffer for WAL replays.
35
- vector<BufferedIndexData> buffered_replays;
66
+
67
+ //! Buffered for index operations during WAL replay. They are replayed upon index binding.
68
+ BufferedIndexReplays buffered_replays;
36
69
 
37
70
  //! Maps the column IDs in the buffered replays to a physical table offset.
38
71
  //! For example, column [i] in a buffered ColumnDataCollection is the data for an Indexed column with
@@ -78,12 +111,13 @@ public:
78
111
  void BufferChunk(DataChunk &index_column_chunk, Vector &row_ids, const vector<StorageIndex> &mapped_column_ids_p,
79
112
  BufferedIndexReplay replay_type);
80
113
  bool HasBufferedReplays() const {
81
- return !buffered_replays.empty();
114
+ return buffered_replays.HasBufferedReplays();
82
115
  }
83
116
 
84
- vector<BufferedIndexData> &GetBufferedReplays() {
117
+ BufferedIndexReplays &GetBufferedReplays() {
85
118
  return buffered_replays;
86
119
  }
120
+
87
121
  const vector<StorageIndex> &GetMappedColumnIds() const {
88
122
  return mapped_column_ids;
89
123
  }
@@ -121,6 +121,8 @@ public:
121
121
 
122
122
  virtual ~BaseScanner() = default;
123
123
 
124
+ void Print() const;
125
+
124
126
  //! Returns true if the scanner is finished
125
127
  bool FinishedFile() const;
126
128
 
@@ -164,10 +166,15 @@ public:
164
166
  //! States
165
167
  CSVStates states;
166
168
 
169
+ //! If the scanner ever entered a quoted state
167
170
  bool ever_quoted = false;
168
171
 
172
+ //! If the scanner ever entered an escaped state.
169
173
  bool ever_escaped = false;
170
174
 
175
+ //! If the scanner ever used advantage of the non-strict mode.
176
+ bool used_unstrictness = false;
177
+
171
178
  //! Shared pointer to the buffer_manager, this is shared across multiple scanners
172
179
  shared_ptr<CSVBufferManager> buffer_manager;
173
180
 
@@ -302,6 +309,9 @@ protected:
302
309
  !state_machine->dialect_options.state_machine_options.strict_mode.GetValue())) {
303
310
  // We only set the ever escaped variable if this is either a quote char OR strict mode is off
304
311
  ever_escaped = true;
312
+ if (states.states[0] == CSVState::UNQUOTED_ESCAPE) {
313
+ used_unstrictness = true;
314
+ }
305
315
  }
306
316
  ever_quoted = true;
307
317
  T::SetQuoted(result, iterator.pos.buffer_pos);
@@ -332,11 +342,15 @@ protected:
332
342
  break;
333
343
  }
334
344
  case CSVState::ESCAPE:
335
- case CSVState::UNQUOTED_ESCAPE:
336
345
  case CSVState::ESCAPED_RETURN:
337
346
  T::SetEscaped(result);
338
347
  iterator.pos.buffer_pos++;
339
348
  break;
349
+ case CSVState::UNQUOTED_ESCAPE:
350
+ T::SetEscaped(result);
351
+ iterator.pos.buffer_pos++;
352
+ used_unstrictness = true;
353
+ break;
340
354
  case CSVState::STANDARD: {
341
355
  iterator.pos.buffer_pos++;
342
356
  while (iterator.pos.buffer_pos + 8 < to_pos) {
@@ -116,6 +116,7 @@ private:
116
116
  //! Highest number of columns found
117
117
  idx_t max_columns_found = 0;
118
118
  idx_t max_columns_found_error = 0;
119
+ bool best_candidate_is_strict = false;
119
120
  //! Current Candidates being considered
120
121
  vector<unique_ptr<ColumnCountScanner>> candidates;
121
122
  //! Reference to original CSV Options, it will be modified as a result of the sniffer.
@@ -176,7 +176,7 @@ public:
176
176
  const shared_ptr<CSVBufferHandle> &buffer_handle, Allocator &buffer_allocator,
177
177
  idx_t result_size_p, idx_t buffer_position, CSVErrorHandler &error_handler, CSVIterator &iterator,
178
178
  bool store_line_size, shared_ptr<CSVFileScan> csv_file_scan, idx_t &lines_read, bool sniffing,
179
- string path, idx_t scan_id);
179
+ const string &path, idx_t scan_id, bool &used_unstrictness);
180
180
 
181
181
  ~StringValueResult();
182
182
 
@@ -225,6 +225,7 @@ public:
225
225
 
226
226
  shared_ptr<CSVFileScan> csv_file_scan;
227
227
  idx_t &lines_read;
228
+ bool &used_unstrictness;
228
229
  //! Information regarding projected columns
229
230
  unsafe_unique_array<bool> projected_columns;
230
231
  bool projecting_columns = false;
@@ -10,6 +10,7 @@
10
10
 
11
11
  #include "duckdb/common/common.hpp"
12
12
  #include "duckdb/execution/physical_operator.hpp"
13
+ #include "duckdb/parser/group_by_node.hpp"
13
14
  #include "duckdb/planner/logical_operator.hpp"
14
15
  #include "duckdb/planner/logical_tokens.hpp"
15
16
  #include "duckdb/planner/joinside.hpp"
@@ -152,7 +153,8 @@ protected:
152
153
  PhysicalOperator &PlanComparisonJoin(LogicalComparisonJoin &op);
153
154
  PhysicalOperator &PlanDelimJoin(LogicalComparisonJoin &op);
154
155
  PhysicalOperator &ExtractAggregateExpressions(PhysicalOperator &child, vector<unique_ptr<Expression>> &expressions,
155
- vector<unique_ptr<Expression>> &groups);
156
+ vector<unique_ptr<Expression>> &groups,
157
+ optional_ptr<vector<GroupingSet>> grouping_sets);
156
158
 
157
159
  private:
158
160
  ClientContext &context;
@@ -70,7 +70,8 @@ public:
70
70
  AggregateType aggr_type = AggregateType::NON_DISTINCT);
71
71
 
72
72
  DUCKDB_API static void BindSortedAggregate(ClientContext &context, BoundAggregateExpression &expr,
73
- const vector<unique_ptr<Expression>> &groups);
73
+ const vector<unique_ptr<Expression>> &groups,
74
+ optional_ptr<vector<GroupingSet>> grouping_sets);
74
75
  DUCKDB_API static void BindSortedAggregate(ClientContext &context, BoundWindowExpression &expr);
75
76
 
76
77
  //! Cast a set of expressions to the arguments of this function
@@ -432,6 +432,8 @@ public:
432
432
  TableFunctionInitialization global_initialization = TableFunctionInitialization::INITIALIZE_ON_EXECUTE;
433
433
 
434
434
  DUCKDB_API bool Equal(const TableFunction &rhs) const;
435
+ DUCKDB_API bool operator==(const TableFunction &rhs) const;
436
+ DUCKDB_API bool operator!=(const TableFunction &rhs) const;
435
437
  };
436
438
 
437
439
  } // namespace duckdb
@@ -26,6 +26,8 @@ struct DatabaseCacheEntry {
26
26
  mutex update_database_mutex;
27
27
  };
28
28
 
29
+ enum class CacheBehavior { AUTOMATIC, ALWAYS_CACHE, NEVER_CACHE };
30
+
29
31
  class DBInstanceCache {
30
32
  public:
31
33
  DBInstanceCache();
@@ -41,6 +43,9 @@ public:
41
43
  //! Either returns an existing entry, or creates and caches a new DB Instance
42
44
  shared_ptr<DuckDB> GetOrCreateInstance(const string &database, DBConfig &config_dict, bool cache_instance,
43
45
  const std::function<void(DuckDB &)> &on_create = nullptr);
46
+ shared_ptr<DuckDB> GetOrCreateInstance(const string &database, DBConfig &config_dict,
47
+ CacheBehavior cache_behavior = CacheBehavior::AUTOMATIC,
48
+ const std::function<void(DuckDB &)> &on_create = nullptr);
44
49
 
45
50
  private:
46
51
  shared_ptr<DatabaseFilePathManager> path_manager;
@@ -1045,6 +1045,8 @@ static constexpr ExtensionEntry EXTENSION_SETTINGS[] = {
1045
1045
  {"http_retry_wait_ms", "httpfs"},
1046
1046
  {"http_timeout", "httpfs"},
1047
1047
  {"httpfs_client_implementation", "httpfs"},
1048
+ {"iceberg_via_aws_sdk_for_catalog_interactions", "iceberg"},
1049
+ {"merge_http_secret_into_s3_request", "httpfs"},
1048
1050
  {"mysql_bit1_as_boolean", "mysql_scanner"},
1049
1051
  {"mysql_debug_show_queries", "mysql_scanner"},
1050
1052
  {"mysql_experimental_filter_pushdown", "mysql_scanner"},
@@ -50,6 +50,7 @@ public:
50
50
  //! If this returns true - this sorts "in_list" as a side-effect
51
51
  static bool IsDenseRange(vector<Value> &in_list);
52
52
  static bool ContainsNull(vector<Value> &in_list);
53
+ static bool FindNextLegalUTF8(string &prefix_string);
53
54
 
54
55
  void GenerateFilters(const std::function<void(unique_ptr<Expression> filter)> &callback);
55
56
  bool HasFilters();
@@ -57,10 +57,10 @@ public:
57
57
  bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
58
58
  void AddRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);
59
59
  //! Add an unnest relation which can come from a logical unnest or a logical get which has an unnest function
60
- void AddUnnestRelation(JoinOrderOptimizer &optimizer, LogicalOperator &op, LogicalOperator &input_op,
61
- optional_ptr<LogicalOperator> parent, RelationStats &child_stats,
62
- optional_ptr<LogicalOperator> limit_op,
63
- vector<reference<LogicalOperator>> &datasource_filters);
60
+ void AddRelationWithChildren(JoinOrderOptimizer &optimizer, LogicalOperator &op, LogicalOperator &input_op,
61
+ optional_ptr<LogicalOperator> parent, RelationStats &child_stats,
62
+ optional_ptr<LogicalOperator> limit_op,
63
+ vector<reference<LogicalOperator>> &datasource_filters);
64
64
  void AddAggregateOrWindowRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent,
65
65
  const RelationStats &stats, LogicalOperatorType op_type);
66
66
  vector<unique_ptr<SingleJoinRelation>> GetRelations();
@@ -10,6 +10,7 @@
10
10
 
11
11
  #include "duckdb/optimizer/rule.hpp"
12
12
  #include "duckdb/parser/expression_map.hpp"
13
+ #include "duckdb/parser/group_by_node.hpp"
13
14
 
14
15
  namespace duckdb {
15
16
 
@@ -18,7 +19,8 @@ public:
18
19
  explicit OrderedAggregateOptimizer(ExpressionRewriter &rewriter);
19
20
 
20
21
  static unique_ptr<Expression> Apply(ClientContext &context, BoundAggregateExpression &aggr,
21
- vector<unique_ptr<Expression>> &groups, bool &changes_made);
22
+ vector<unique_ptr<Expression>> &groups,
23
+ optional_ptr<vector<GroupingSet>> grouping_sets, bool &changes_made);
22
24
  unique_ptr<Expression> Apply(LogicalOperator &op, vector<reference<Expression>> &bindings, bool &changes_made,
23
25
  bool is_root) override;
24
26
  };
@@ -23,6 +23,9 @@ enum class SampleMethod : uint8_t { SYSTEM_SAMPLE = 0, BERNOULLI_SAMPLE = 1, RES
23
23
  string SampleMethodToString(SampleMethod method);
24
24
 
25
25
  class SampleOptions {
26
+ public:
27
+ // 1 billion rows should be enough.
28
+ static constexpr idx_t MAX_SAMPLE_ROWS = 1000000000;
26
29
 
27
30
  public:
28
31
  explicit SampleOptions(int64_t seed_ = -1);
@@ -405,7 +405,7 @@ private:
405
405
 
406
406
  unique_ptr<QueryNode> BindTableMacro(FunctionExpression &function, TableMacroCatalogEntry &macro_func, idx_t depth);
407
407
 
408
- unique_ptr<BoundCTENode> BindMaterializedCTE(CommonTableExpressionMap &cte_map);
408
+ unique_ptr<BoundCTENode> BindMaterializedCTE(CommonTableExpressionMap &cte_map, unique_ptr<CTENode> &cte_root);
409
409
  unique_ptr<BoundCTENode> BindCTE(CTENode &statement);
410
410
 
411
411
  unique_ptr<BoundQueryNode> BindNode(SelectNode &node);
@@ -9,6 +9,7 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/limits.hpp"
12
+ #include "duckdb/parser/group_by_node.hpp"
12
13
  #include "duckdb/parser/result_modifier.hpp"
13
14
  #include "duckdb/planner/bound_statement.hpp"
14
15
  #include "duckdb/planner/expression.hpp"
@@ -155,8 +156,9 @@ public:
155
156
 
156
157
  //! Remove unneeded/duplicate order elements.
157
158
  //! Returns true of orders is not empty.
158
- static bool Simplify(vector<BoundOrderByNode> &orders, const vector<unique_ptr<Expression>> &groups);
159
- bool Simplify(const vector<unique_ptr<Expression>> &groups);
159
+ static bool Simplify(vector<BoundOrderByNode> &orders, const vector<unique_ptr<Expression>> &groups,
160
+ optional_ptr<vector<GroupingSet>> grouping_sets);
161
+ bool Simplify(const vector<unique_ptr<Expression>> &groups, optional_ptr<vector<GroupingSet>> grouping_sets);
160
162
  };
161
163
 
162
164
  enum class DistinctType : uint8_t { DISTINCT = 0, DISTINCT_ON = 1 };
@@ -10,9 +10,8 @@
10
10
 
11
11
  #include "duckdb/common/exception.hpp"
12
12
  #include "duckdb/common/stack_checker.hpp"
13
- #include "duckdb/common/exception/binder_exception.hpp"
14
13
  #include "duckdb/common/error_data.hpp"
15
- #include "duckdb/common/unordered_map.hpp"
14
+ #include "duckdb/common/exception/binder_exception.hpp"
16
15
  #include "duckdb/parser/expression/bound_expression.hpp"
17
16
  #include "duckdb/parser/expression/lambdaref_expression.hpp"
18
17
  #include "duckdb/parser/parsed_expression.hpp"
@@ -33,7 +33,7 @@ private:
33
33
  bool parent_is_dependent_join = false);
34
34
 
35
35
  //! Mark entire subtree of Logical Operators as correlated by adding them to the has_correlated_expressions map.
36
- bool MarkSubtreeCorrelated(LogicalOperator &op);
36
+ bool MarkSubtreeCorrelated(LogicalOperator &op, idx_t cte_index);
37
37
 
38
38
  //! Push the dependent join down a LogicalOperator
39
39
  unique_ptr<LogicalOperator> PushDownDependentJoin(unique_ptr<LogicalOperator> plan,
@@ -17,13 +17,15 @@ namespace duckdb {
17
17
  //! Helper class to rewrite correlated cte scans within a single LogicalOperator
18
18
  class RewriteCTEScan : public LogicalOperatorVisitor {
19
19
  public:
20
- RewriteCTEScan(idx_t table_index, const CorrelatedColumns &correlated_columns);
20
+ RewriteCTEScan(idx_t table_index, const CorrelatedColumns &correlated_columns,
21
+ bool rewrite_dependent_joins = false);
21
22
 
22
23
  void VisitOperator(LogicalOperator &op) override;
23
24
 
24
25
  private:
25
26
  idx_t table_index;
26
27
  const CorrelatedColumns &correlated_columns;
28
+ bool rewrite_dependent_joins = false;
27
29
  };
28
30
 
29
31
  } // namespace duckdb
@@ -45,7 +45,7 @@ public:
45
45
  virtual bool Fetch(TransactionData transaction, row_t row) = 0;
46
46
  virtual void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) = 0;
47
47
  virtual idx_t GetCommittedDeletedCount(idx_t max_count) = 0;
48
- virtual bool Cleanup(transaction_t lowest_transaction, unique_ptr<ChunkInfo> &result) const;
48
+ virtual bool Cleanup(transaction_t lowest_transaction) const;
49
49
 
50
50
  virtual bool HasDeletes() const = 0;
51
51
 
@@ -87,7 +87,7 @@ public:
87
87
  bool Fetch(TransactionData transaction, row_t row) override;
88
88
  void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override;
89
89
  idx_t GetCommittedDeletedCount(idx_t max_count) override;
90
- bool Cleanup(transaction_t lowest_transaction, unique_ptr<ChunkInfo> &result) const override;
90
+ bool Cleanup(transaction_t lowest_transaction) const override;
91
91
 
92
92
  bool HasDeletes() const override;
93
93
 
@@ -124,7 +124,7 @@ public:
124
124
  SelectionVector &sel_vector, idx_t max_count) override;
125
125
  bool Fetch(TransactionData transaction, row_t row) override;
126
126
  void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override;
127
- bool Cleanup(transaction_t lowest_transaction, unique_ptr<ChunkInfo> &result) const override;
127
+ bool Cleanup(transaction_t lowest_transaction) const override;
128
128
  idx_t GetCommittedDeletedCount(idx_t max_count) override;
129
129
 
130
130
  void Append(idx_t start, idx_t end, transaction_t commit_id);
@@ -99,10 +99,6 @@ public:
99
99
 
100
100
  const vector<MetaBlockPointer> &GetColumnStartPointers() const;
101
101
 
102
- //! Returns the list of meta block pointers used by the deletes
103
- const vector<MetaBlockPointer> &GetDeletesPointers() const {
104
- return deletes_pointers;
105
- }
106
102
  BlockManager &GetBlockManager();
107
103
  DataTableInfo &GetTableInfo();
108
104
 
@@ -198,6 +194,8 @@ public:
198
194
 
199
195
  static FilterPropagateResult CheckRowIdFilter(const TableFilter &filter, idx_t beg_row, idx_t end_row);
200
196
 
197
+ vector<MetaBlockPointer> CheckpointDeletes(MetadataManager &manager);
198
+
201
199
  private:
202
200
  optional_ptr<RowVersionManager> GetVersionInfo();
203
201
  shared_ptr<RowVersionManager> GetOrCreateVersionInfoPtr();
@@ -214,8 +212,6 @@ private:
214
212
  template <TableScanType TYPE>
215
213
  void TemplatedScan(TransactionData transaction, CollectionScanState &state, DataChunk &result);
216
214
 
217
- vector<MetaBlockPointer> CheckpointDeletes(MetadataManager &manager);
218
-
219
215
  bool HasUnloadedDeletes() const;
220
216
 
221
217
  private:
@@ -46,11 +46,14 @@ public:
46
46
  static shared_ptr<RowVersionManager> Deserialize(MetaBlockPointer delete_pointer, MetadataManager &manager,
47
47
  idx_t start);
48
48
 
49
+ bool HasUnserializedChanges();
50
+ vector<MetaBlockPointer> GetStoragePointers();
51
+
49
52
  private:
50
53
  mutex version_lock;
51
54
  idx_t start;
52
55
  vector<unique_ptr<ChunkInfo>> vector_info;
53
- bool has_changes;
56
+ bool has_unserialized_changes;
54
57
  vector<MetaBlockPointer> storage_pointers;
55
58
 
56
59
  private:
@@ -23,6 +23,8 @@ public:
23
23
  public:
24
24
  FilterPropagateResult CheckZonemap(ColumnScanState &state, TableFilter &filter) override;
25
25
  void AppendData(BaseStatistics &stats, ColumnAppendState &state, UnifiedVectorFormat &vdata, idx_t count) override;
26
+ void UpdateWithBase(TransactionData transaction, DataTable &data_table, idx_t column_index, Vector &update_vector,
27
+ row_t *row_ids, idx_t update_count, ColumnData &base);
26
28
  };
27
29
 
28
30
  } // namespace duckdb
@@ -22,26 +22,19 @@ namespace duckdb {
22
22
 
23
23
  vector<LogicalType> LogStorage::GetSchema(LoggingTargetTable table) {
24
24
  switch (table) {
25
- case LoggingTargetTable::ALL_LOGS:
26
- return {
27
- LogicalType::UBIGINT, // context_id
28
- LogicalType::VARCHAR, // scope
29
- LogicalType::UBIGINT, // connection_id
30
- LogicalType::UBIGINT, // transaction_id
31
- LogicalType::UBIGINT, // query_id
32
- LogicalType::UBIGINT, // thread
33
- LogicalType::TIMESTAMP, // timestamp
34
- LogicalType::VARCHAR, // log_type
35
- LogicalType::VARCHAR, // level
36
- LogicalType::VARCHAR, // message
37
- };
25
+ case LoggingTargetTable::ALL_LOGS: {
26
+ auto all_logs = GetSchema(LoggingTargetTable::LOG_CONTEXTS);
27
+ auto log_entries = GetSchema(LoggingTargetTable::LOG_ENTRIES);
28
+ all_logs.insert(all_logs.end(), log_entries.begin() + 1, log_entries.end());
29
+ return all_logs;
30
+ }
38
31
  case LoggingTargetTable::LOG_ENTRIES:
39
32
  return {
40
- LogicalType::UBIGINT, // context_id
41
- LogicalType::TIMESTAMP, // timestamp
42
- LogicalType::VARCHAR, // log_type
43
- LogicalType::VARCHAR, // level
44
- LogicalType::VARCHAR, // message
33
+ LogicalType::UBIGINT, // context_id
34
+ LogicalType::TIMESTAMP_TZ, // timestamp
35
+ LogicalType::VARCHAR, // log_type
36
+ LogicalType::VARCHAR, // level
37
+ LogicalType::VARCHAR, // message
45
38
  };
46
39
  case LoggingTargetTable::LOG_CONTEXTS:
47
40
  return {
@@ -59,11 +52,12 @@ vector<LogicalType> LogStorage::GetSchema(LoggingTargetTable table) {
59
52
 
60
53
  vector<string> LogStorage::GetColumnNames(LoggingTargetTable table) {
61
54
  switch (table) {
62
- case LoggingTargetTable::ALL_LOGS:
63
- return {
64
- "context_id", "scope", "connection_id", "transaction_id", "query_id",
65
- "thread_id", "timestamp", "type", "log_level", "message",
66
- };
55
+ case LoggingTargetTable::ALL_LOGS: {
56
+ auto all_logs = GetColumnNames(LoggingTargetTable::LOG_CONTEXTS);
57
+ auto log_entries = GetColumnNames(LoggingTargetTable::LOG_ENTRIES);
58
+ all_logs.insert(all_logs.end(), log_entries.begin() + 1, log_entries.end());
59
+ return all_logs;
60
+ }
67
61
  case LoggingTargetTable::LOG_ENTRIES:
68
62
  return {"context_id", "timestamp", "type", "log_level", "message"};
69
63
  case LoggingTargetTable::LOG_CONTEXTS:
@@ -41,7 +41,7 @@ duckdb_state duckdb_open_internal(DBInstanceCacheWrapper *cache, const char *pat
41
41
  if (path) {
42
42
  path_str = path;
43
43
  }
44
- wrapper->database = cache->instance_cache->GetOrCreateInstance(path_str, *db_config, true);
44
+ wrapper->database = cache->instance_cache->GetOrCreateInstance(path_str, *db_config);
45
45
  } else {
46
46
  wrapper->database = duckdb::make_shared_ptr<DuckDB>(path, db_config);
47
47
  }
@@ -23,11 +23,6 @@ Connection::Connection(DatabaseInstance &database)
23
23
  auto &connection_manager = ConnectionManager::Get(database);
24
24
  connection_manager.AddConnection(*context);
25
25
  connection_manager.AssignConnectionId(*this);
26
-
27
- #ifdef DEBUG
28
- EnableProfiling();
29
- context->config.emit_profiler_output = false;
30
- #endif
31
26
  }
32
27
 
33
28
  Connection::Connection(DuckDB &database) : Connection(*database.instance) {
@@ -83,7 +83,16 @@ shared_ptr<AttachedDatabase> DatabaseManager::GetDatabaseInternal(const lock_gua
83
83
 
84
84
  shared_ptr<AttachedDatabase> DatabaseManager::AttachDatabase(ClientContext &context, AttachInfo &info,
85
85
  AttachOptions &options) {
86
- auto &config = DBConfig::GetConfig(context);
86
+ string extension = "";
87
+ if (FileSystem::IsRemoteFile(info.path, extension)) {
88
+ if (options.access_mode == AccessMode::AUTOMATIC) {
89
+ // Attaching of remote files gets bumped to READ_ONLY
90
+ // This is due to the fact that on most (all?) remote files writes to DB are not available
91
+ // and having this raised later is not super helpful
92
+ options.access_mode = AccessMode::READ_ONLY;
93
+ }
94
+ }
95
+
87
96
  if (options.db_type.empty() || StringUtil::CIEquals(options.db_type, "duckdb")) {
88
97
  while (InsertDatabasePath(info, options) == InsertDatabasePathResult::ALREADY_EXISTS) {
89
98
  // database with this name and path already exists
@@ -99,6 +108,7 @@ shared_ptr<AttachedDatabase> DatabaseManager::AttachDatabase(ClientContext &cont
99
108
  }
100
109
  }
101
110
  }
111
+ auto &config = DBConfig::GetConfig(context);
102
112
  GetDatabaseType(context, info, config, options);
103
113
  if (!options.db_type.empty()) {
104
114
  // we only need to prevent duplicate opening of DuckDB files
@@ -108,18 +118,11 @@ shared_ptr<AttachedDatabase> DatabaseManager::AttachDatabase(ClientContext &cont
108
118
  if (AttachedDatabase::NameIsReserved(info.name)) {
109
119
  throw BinderException("Attached database name \"%s\" cannot be used because it is a reserved name", info.name);
110
120
  }
111
- string extension = "";
112
- if (FileSystem::IsRemoteFile(info.path, extension)) {
121
+ if (!extension.empty()) {
113
122
  if (!ExtensionHelper::TryAutoLoadExtension(context, extension)) {
114
123
  throw MissingExtensionException("Attaching path '%s' requires extension '%s' to be loaded", info.path,
115
124
  extension);
116
125
  }
117
- if (options.access_mode == AccessMode::AUTOMATIC) {
118
- // Attaching of remote files gets bumped to READ_ONLY
119
- // This is due to the fact that on most (all?) remote files writes to DB are not available
120
- // and having this raised later is not super helpful
121
- options.access_mode = AccessMode::READ_ONLY;
122
- }
123
126
  }
124
127
 
125
128
  // now create the attached database
@@ -137,9 +137,23 @@ shared_ptr<DuckDB> DBInstanceCache::CreateInstance(const string &database, DBCon
137
137
  shared_ptr<DuckDB> DBInstanceCache::GetOrCreateInstance(const string &database, DBConfig &config_dict,
138
138
  bool cache_instance,
139
139
  const std::function<void(DuckDB &)> &on_create) {
140
+ auto cache_behavior = cache_instance ? CacheBehavior::ALWAYS_CACHE : CacheBehavior::NEVER_CACHE;
141
+ return GetOrCreateInstance(database, config_dict, cache_behavior, on_create);
142
+ }
143
+
144
+ shared_ptr<DuckDB> DBInstanceCache::GetOrCreateInstance(const string &database, DBConfig &config_dict,
145
+ CacheBehavior cache_behavior,
146
+ const std::function<void(DuckDB &)> &on_create) {
140
147
  unique_lock<mutex> lock(cache_lock, std::defer_lock);
148
+ bool cache_instance = cache_behavior == CacheBehavior::ALWAYS_CACHE;
149
+ if (cache_behavior == CacheBehavior::AUTOMATIC) {
150
+ // cache all unnamed in-memory connections
151
+ cache_instance = true;
152
+ if (database == IN_MEMORY_PATH || database.empty()) {
153
+ cache_instance = false;
154
+ }
155
+ }
141
156
  if (cache_instance) {
142
-
143
157
  // While we do not own the lock, we cannot definitively say that the database instance does not exist.
144
158
  while (!lock.owns_lock()) {
145
159
  // The problem is, that we have to unlock the mutex in GetInstanceInternal, so we can non-blockingly wait
@@ -10,6 +10,7 @@ static const ExtensionAlias internal_aliases[] = {{"http", "httpfs"}, // httpfs
10
10
  {"postgres", "postgres_scanner"}, // postgres
11
11
  {"sqlite", "sqlite_scanner"}, // sqlite
12
12
  {"sqlite3", "sqlite_scanner"},
13
+ {"uc_catalog", "unity_catalog"}, // old name for compatibility
13
14
  {nullptr, nullptr}};
14
15
 
15
16
  idx_t ExtensionHelper::ExtensionAliasCount() {