duckdb 0.7.2-dev3402.0 → 0.7.2-dev3441.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.7.2-dev3402.0",
5
+ "version": "0.7.2-dev3441.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -721,9 +721,9 @@ TableStorageInfo DuckTableEntry::GetStorageInfo(ClientContext &context) {
721
721
  storage->info->indexes.Scan([&](Index &index) {
722
722
  IndexInfo info;
723
723
  info.is_primary = index.IsPrimary();
724
- info.is_unique = index.IsUnique();
724
+ info.is_unique = index.IsUnique() || info.is_primary;
725
725
  info.is_foreign = index.IsForeign();
726
- index.column_id_set = index.column_id_set;
726
+ info.column_set = index.column_id_set;
727
727
  result.index_info.push_back(std::move(info));
728
728
  return false;
729
729
  });
@@ -621,7 +621,7 @@ void CatalogSet::Undo(CatalogEntry &entry) {
621
621
  auto &dependency_manager = catalog.GetDependencyManager();
622
622
  dependency_manager.EraseObject(to_be_removed_node);
623
623
  }
624
- if (entry.name != to_be_removed_node.name) {
624
+ if (!StringUtil::CIEquals(entry.name, to_be_removed_node.name)) {
625
625
  // rename: clean up the new name when the rename is rolled back
626
626
  auto removed_entry = mapping.find(to_be_removed_node.name);
627
627
  if (removed_entry->second->child) {
@@ -60,11 +60,15 @@ static void AppendFilteredToResult(Vector &lambda_vector, list_entry_t *result_e
60
60
 
61
61
  idx_t true_count = 0;
62
62
  SelectionVector true_sel(elem_cnt);
63
- auto lambda_values = FlatVector::GetData<bool>(lambda_vector);
64
- auto &lambda_validity = FlatVector::Validity(lambda_vector);
63
+ UnifiedVectorFormat lambda_data;
64
+ lambda_vector.ToUnifiedFormat(elem_cnt, lambda_data);
65
+
66
+ auto lambda_values = (bool *)lambda_data.data;
67
+ auto &lambda_validity = lambda_data.validity;
65
68
 
66
69
  // compute the new lengths and offsets, and create a selection vector
67
70
  for (idx_t i = 0; i < elem_cnt; i++) {
71
+ auto entry = lambda_data.sel->get_index(i);
68
72
 
69
73
  while (appended_lists_cnt < lists_len.size() && lists_len[appended_lists_cnt] == 0) {
70
74
  result_entries[appended_lists_cnt].offset = curr_list_offset;
@@ -73,12 +77,11 @@ static void AppendFilteredToResult(Vector &lambda_vector, list_entry_t *result_e
73
77
  }
74
78
 
75
79
  // found a true value
76
- if (lambda_validity.RowIsValid(i)) {
77
- if (lambda_values[i] > 0) {
78
- true_sel.set_index(true_count++, i);
79
- curr_list_len++;
80
- }
80
+ if (lambda_validity.RowIsValid(entry) && lambda_values[entry] > 0) {
81
+ true_sel.set_index(true_count++, i);
82
+ curr_list_len++;
81
83
  }
84
+
82
85
  curr_original_list_len++;
83
86
 
84
87
  if (lists_len[appended_lists_cnt] == curr_original_list_len) {
@@ -74,6 +74,9 @@ static idx_t BetweenLoopTypeSwitch(Vector &input, Vector &lower, Vector &upper,
74
74
  case PhysicalType::VARCHAR:
75
75
  return TernaryExecutor::Select<string_t, string_t, string_t, OP>(input, lower, upper, sel, count, true_sel,
76
76
  false_sel);
77
+ case PhysicalType::INTERVAL:
78
+ return TernaryExecutor::Select<interval_t, interval_t, interval_t, OP>(input, lower, upper, sel, count,
79
+ true_sel, false_sel);
77
80
  default:
78
81
  throw InvalidTypeException(input.GetType(), "Invalid type for BETWEEN");
79
82
  }
@@ -213,6 +213,8 @@ OperatorResultType PhysicalIndexJoin::ExecuteInternal(ExecutionContext &context,
213
213
  state.lhs_idx = 0;
214
214
  state.rhs_idx = 0;
215
215
  state.first_fetch = true;
216
+ // reset the LHS chunk to reset the validity masks
217
+ state.join_keys.Reset();
216
218
  return OperatorResultType::NEED_MORE_INPUT;
217
219
  }
218
220
  //! Output vectors
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.2-dev3402"
2
+ #define DUCKDB_VERSION "0.7.2-dev3441"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "6f543cb464"
5
+ #define DUCKDB_SOURCE_ID "e97702367a"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -39,6 +39,7 @@ class LogicalProjection;
39
39
  class ColumnList;
40
40
  class ExternalDependency;
41
41
  class TableFunction;
42
+ class TableStorageInfo;
42
43
 
43
44
  struct CreateInfo;
44
45
  struct BoundCreateTableInfo;
@@ -167,8 +168,8 @@ public:
167
168
  unique_ptr<LogicalOperator> BindUpdateSet(LogicalOperator &op, unique_ptr<LogicalOperator> root,
168
169
  UpdateSetInfo &set_info, TableCatalogEntry &table,
169
170
  vector<PhysicalIndex> &columns);
170
- void BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert *insert, UpdateSetInfo &set_info,
171
- TableCatalogEntry &table);
171
+ void BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
172
+ TableCatalogEntry &table, TableStorageInfo &storage_info);
172
173
  void BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &table, InsertStatement &stmt);
173
174
 
174
175
  static void BindSchemaOrCatalog(ClientContext &context, string &catalog, string &schema);
@@ -8,7 +8,9 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include "duckdb/storage/storage_info.hpp"
11
12
  #include "duckdb/common/types/value.hpp"
13
+ #include "duckdb/common/unordered_set.hpp"
12
14
 
13
15
  namespace duckdb {
14
16
 
@@ -1013,6 +1013,7 @@ void ClientContext::TryBindRelation(Relation &relation, vector<ColumnDefinition>
1013
1013
  D_ASSERT(!relation.GetAlias().empty());
1014
1014
  D_ASSERT(!relation.ToString().empty());
1015
1015
  #endif
1016
+ client_data->http_state = make_uniq<HTTPState>();
1016
1017
  RunFunctionInTransaction([&]() {
1017
1018
  // bind the expressions
1018
1019
  auto binder = Binder::CreateBinder(*this);
@@ -574,7 +574,7 @@ void CardinalityEstimator::EstimateBaseTableCardinality(JoinNode &node, LogicalO
574
574
  D_ASSERT(node.set.count == 1);
575
575
  auto relation_id = node.set.relations[0];
576
576
 
577
- double lowest_card_found = NumericLimits<double>::Maximum();
577
+ double lowest_card_found = node.GetBaseTableCardinality();
578
578
  for (auto &column : relation_attributes[relation_id].columns) {
579
579
  auto card_after_filters = node.GetBaseTableCardinality();
580
580
  ColumnBinding key = ColumnBinding(relation_id, column);
@@ -147,6 +147,18 @@ bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op,
147
147
  }
148
148
  }
149
149
  }
150
+ if (op->type == LogicalOperatorType::LOGICAL_ANY_JOIN && non_reorderable_operation) {
151
+ auto &join = op->Cast<LogicalAnyJoin>();
152
+ if (join.join_type == JoinType::LEFT && join.right_projection_map.empty()) {
153
+ auto lhs_cardinality = join.children[0]->EstimateCardinality(context);
154
+ auto rhs_cardinality = join.children[1]->EstimateCardinality(context);
155
+ if (rhs_cardinality > lhs_cardinality * 2) {
156
+ join.join_type = JoinType::RIGHT;
157
+ std::swap(join.children[0], join.children[1]);
158
+ }
159
+ }
160
+ }
161
+
150
162
  if (non_reorderable_operation) {
151
163
  // we encountered a non-reordable operation (setop or non-inner join)
152
164
  // we do not reorder non-inner joins yet, however we do want to expand the potential join graph around them
@@ -648,7 +660,19 @@ void JoinOrderOptimizer::SolveJoinOrderApproximately() {
648
660
  // we have to add a cross product; we add it between the two smallest relations
649
661
  optional_ptr<JoinNode> smallest_plans[2];
650
662
  idx_t smallest_index[2];
651
- for (idx_t i = 0; i < join_relations.size(); i++) {
663
+ D_ASSERT(join_relations.size() >= 2);
664
+
665
+ // first just add the first two join relations. It doesn't matter the cost as the JOO
666
+ // will swap them on estimated cardinality anyway.
667
+ for (idx_t i = 0; i < 2; i++) {
668
+ auto current_plan = plans[&join_relations[i].get()].get();
669
+ smallest_plans[i] = current_plan;
670
+ smallest_index[i] = i;
671
+ }
672
+
673
+ // if there are any other join relations that don't have connections
674
+ // add them if they have lower estimated cardinality.
675
+ for (idx_t i = 2; i < join_relations.size(); i++) {
652
676
  // get the plan for this relation
653
677
  auto current_plan = plans[&join_relations[i].get()].get();
654
678
  // check if the cardinality is smaller than the smallest two found so far
@@ -25,6 +25,7 @@
25
25
  #include "duckdb/planner/tableref/bound_basetableref.hpp"
26
26
  #include "duckdb/planner/tableref/bound_dummytableref.hpp"
27
27
  #include "duckdb/parser/parsed_expression_iterator.hpp"
28
+ #include "duckdb/storage/table_storage_info.hpp"
28
29
 
29
30
  namespace duckdb {
30
31
 
@@ -78,10 +79,10 @@ void ReplaceColumnBindings(Expression &expr, idx_t source, idx_t dest) {
78
79
  expr, [&](unique_ptr<Expression> &child) { ReplaceColumnBindings(*child, source, dest); });
79
80
  }
80
81
 
81
- void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert *insert, UpdateSetInfo &set_info,
82
- TableCatalogEntry &table) {
83
- D_ASSERT(insert->children.size() == 1);
84
- D_ASSERT(insert->children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
82
+ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
83
+ TableCatalogEntry &table, TableStorageInfo &storage_info) {
84
+ D_ASSERT(insert.children.size() == 1);
85
+ D_ASSERT(insert.children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
85
86
 
86
87
  vector<column_t> logical_column_ids;
87
88
  vector<string> column_names;
@@ -97,13 +98,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
97
98
  if (column.Generated()) {
98
99
  throw BinderException("Cant update column \"%s\" because it is a generated column!", column.Name());
99
100
  }
100
- if (std::find(insert->set_columns.begin(), insert->set_columns.end(), column.Physical()) !=
101
- insert->set_columns.end()) {
101
+ if (std::find(insert.set_columns.begin(), insert.set_columns.end(), column.Physical()) !=
102
+ insert.set_columns.end()) {
102
103
  throw BinderException("Multiple assignments to same column \"%s\"", colname);
103
104
  }
104
- insert->set_columns.push_back(column.Physical());
105
+ insert.set_columns.push_back(column.Physical());
105
106
  logical_column_ids.push_back(column.Oid());
106
- insert->set_types.push_back(column.Type());
107
+ insert.set_types.push_back(column.Type());
107
108
  column_names.push_back(colname);
108
109
  if (expr->type == ExpressionType::VALUE_DEFAULT) {
109
110
  expr = ExpandDefaultExpression(column);
@@ -120,14 +121,13 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
120
121
  throw BinderException("Expression in the DO UPDATE SET clause can not be a subquery");
121
122
  }
122
123
 
123
- insert->expressions.push_back(std::move(bound_expr));
124
+ insert.expressions.push_back(std::move(bound_expr));
124
125
  }
125
126
 
126
127
  // Figure out which columns are indexed on
127
128
  unordered_set<column_t> indexed_columns;
128
- auto &indexes = table.GetStorage().info->indexes.Indexes();
129
- for (auto &index : indexes) {
130
- for (auto &column_id : index->column_id_set) {
129
+ for (auto &index : storage_info.index_info) {
130
+ for (auto &column_id : index.column_set) {
131
131
  indexed_columns.insert(column_id);
132
132
  }
133
133
  }
@@ -142,16 +142,16 @@ void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert
142
142
  }
143
143
  }
144
144
 
145
- unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert) {
145
+ unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert,
146
+ TableStorageInfo &storage_info) {
146
147
  auto set_info = make_uniq<UpdateSetInfo>();
147
148
 
148
149
  auto &columns = set_info->columns;
149
150
  // Figure out which columns are indexed on
150
151
 
151
152
  unordered_set<column_t> indexed_columns;
152
- auto &indexes = table.GetStorage().info->indexes.Indexes();
153
- for (auto &index : indexes) {
154
- for (auto &column_id : index->column_id_set) {
153
+ for (auto &index : storage_info.index_info) {
154
+ for (auto &column_id : index.column_set) {
155
155
  indexed_columns.insert(column_id);
156
156
  }
157
157
  }
@@ -190,9 +190,6 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
190
190
  insert.action_type = OnConflictAction::THROW;
191
191
  return;
192
192
  }
193
- if (!table.IsDuckTable()) {
194
- throw BinderException("ON CONFLICT clause is not yet supported for non-DuckDB tables");
195
- }
196
193
  D_ASSERT(stmt.table_ref->type == TableReferenceType::BASE_TABLE);
197
194
 
198
195
  // visit the table reference
@@ -208,6 +205,9 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
208
205
  D_ASSERT(on_conflict.action_type != OnConflictAction::THROW);
209
206
  insert.action_type = on_conflict.action_type;
210
207
 
208
+ // obtain the table storage info
209
+ auto storage_info = table.GetStorageInfo(context);
210
+
211
211
  auto &columns = table.GetColumns();
212
212
  if (!on_conflict.indexed_columns.empty()) {
213
213
  // Bind the ON CONFLICT (<columns>)
@@ -232,18 +232,17 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
232
232
  insert.on_conflict_filter.insert(col.Oid());
233
233
  }
234
234
  }
235
- auto &indexes = table.GetStorage().info->indexes;
236
235
  bool index_references_columns = false;
237
- indexes.Scan([&](Index &index) {
238
- if (!index.IsUnique()) {
239
- return false;
236
+ for (auto &index : storage_info.index_info) {
237
+ if (!index.is_unique) {
238
+ continue;
240
239
  }
241
- bool index_matches = insert.on_conflict_filter == index.column_id_set;
240
+ bool index_matches = insert.on_conflict_filter == index.column_set;
242
241
  if (index_matches) {
243
242
  index_references_columns = true;
243
+ break;
244
244
  }
245
- return index_matches;
246
- });
245
+ }
247
246
  if (!index_references_columns) {
248
247
  // Same as before, this is essentially a no-op, turning this into a DO THROW instead
249
248
  // But since this makes no logical sense, it's probably better to throw an error
@@ -254,21 +253,19 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
254
253
  // When omitting the conflict target, the ON CONFLICT applies to every UNIQUE/PRIMARY KEY on the table
255
254
 
256
255
  // We check if there are any constraints on the table, if there aren't we throw an error.
257
- auto &indexes = table.GetStorage().info->indexes;
258
256
  idx_t found_matching_indexes = 0;
259
- indexes.Scan([&](Index &index) {
260
- if (!index.IsUnique()) {
261
- return false;
257
+ for (auto &index : storage_info.index_info) {
258
+ if (!index.is_unique) {
259
+ continue;
262
260
  }
263
261
  // does this work with multi-column indexes?
264
- auto &indexed_columns = index.column_id_set;
262
+ auto &indexed_columns = index.column_set;
265
263
  for (auto &column : table.GetColumns().Physical()) {
266
264
  if (indexed_columns.count(column.Physical().index)) {
267
265
  found_matching_indexes++;
268
266
  }
269
267
  }
270
- return false;
271
- });
268
+ }
272
269
  if (!found_matching_indexes) {
273
270
  throw BinderException(
274
271
  "There are no UNIQUE/PRIMARY KEY Indexes that refer to this table, ON CONFLICT is a no-op");
@@ -338,7 +335,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
338
335
 
339
336
  if (insert.action_type == OnConflictAction::REPLACE) {
340
337
  D_ASSERT(on_conflict.set_info == nullptr);
341
- on_conflict.set_info = CreateSetInfoForReplace(table, stmt);
338
+ on_conflict.set_info = CreateSetInfoForReplace(table, stmt, storage_info);
342
339
  insert.action_type = OnConflictAction::UPDATE;
343
340
  }
344
341
  if (on_conflict.set_info && on_conflict.set_info->columns.empty()) {
@@ -374,7 +371,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl
374
371
  insert.do_update_condition = std::move(condition);
375
372
  }
376
373
 
377
- BindDoUpdateSetExpressions(table_alias, &insert, set_info, table);
374
+ BindDoUpdateSetExpressions(table_alias, insert, set_info, table, storage_info);
378
375
 
379
376
  // Get the column_ids we need to fetch later on from the conflicting tuples
380
377
  // of the original table, to execute the expressions
@@ -526,43 +526,59 @@ void RowGroupCollection::Update(TransactionData transaction, row_t *ids, const v
526
526
  void RowGroupCollection::RemoveFromIndexes(TableIndexList &indexes, Vector &row_identifiers, idx_t count) {
527
527
  auto row_ids = FlatVector::GetData<row_t>(row_identifiers);
528
528
 
529
- // figure out which row_group to fetch from
530
- auto row_group = row_groups->GetSegment(row_ids[0]);
531
- auto row_group_vector_idx = (row_ids[0] - row_group->start) / STANDARD_VECTOR_SIZE;
532
- auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
533
-
534
- // create a selection vector from the row_ids
535
- SelectionVector sel(STANDARD_VECTOR_SIZE);
536
- for (idx_t i = 0; i < count; i++) {
537
- auto row_in_vector = row_ids[i] - base_row_id;
538
- D_ASSERT(row_in_vector < STANDARD_VECTOR_SIZE);
539
- sel.set_index(i, row_in_vector);
540
- }
541
-
542
- // now fetch the columns from that row_group
543
- TableScanState state;
544
- state.table_state.max_row = row_start + total_rows;
545
-
529
+ // initialize the fetch state
546
530
  // FIXME: we do not need to fetch all columns, only the columns required by the indices!
531
+ TableScanState state;
547
532
  vector<column_t> column_ids;
548
533
  column_ids.reserve(types.size());
549
534
  for (idx_t i = 0; i < types.size(); i++) {
550
535
  column_ids.push_back(i);
551
536
  }
552
537
  state.Initialize(std::move(column_ids));
538
+ state.table_state.max_row = row_start + total_rows;
553
539
 
540
+ // initialize the fetch chunk
554
541
  DataChunk result;
555
542
  result.Initialize(GetAllocator(), types);
556
543
 
557
- state.table_state.Initialize(GetTypes());
558
- row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
559
- row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
560
- result.Slice(sel, count);
544
+ SelectionVector sel(STANDARD_VECTOR_SIZE);
545
+ // now iterate over the row ids
546
+ for (idx_t r = 0; r < count;) {
547
+ result.Reset();
548
+ // figure out which row_group to fetch from
549
+ auto row_id = row_ids[r];
550
+ auto row_group = row_groups->GetSegment(row_id);
551
+ auto row_group_vector_idx = (row_id - row_group->start) / STANDARD_VECTOR_SIZE;
552
+ auto base_row_id = row_group_vector_idx * STANDARD_VECTOR_SIZE + row_group->start;
553
+
554
+ // fetch the current vector
555
+ state.table_state.Initialize(GetTypes());
556
+ row_group->InitializeScanWithOffset(state.table_state, row_group_vector_idx);
557
+ row_group->ScanCommitted(state.table_state, result, TableScanType::TABLE_SCAN_COMMITTED_ROWS);
558
+ result.Verify();
559
+
560
+ // check for any remaining row ids if they also fall into this vector
561
+ // we try to fetch handle as many rows as possible at the same time
562
+ idx_t sel_count = 0;
563
+ for (; r < count; r++) {
564
+ idx_t current_row = idx_t(row_ids[r]);
565
+ if (current_row < base_row_id || current_row >= base_row_id + result.size()) {
566
+ // this row-id does not fall into the current chunk - break
567
+ break;
568
+ }
569
+ auto row_in_vector = current_row - base_row_id;
570
+ D_ASSERT(row_in_vector < result.size());
571
+ sel.set_index(sel_count++, row_in_vector);
572
+ }
573
+ D_ASSERT(sel_count > 0);
574
+ // slice the vector with all rows that are present in this vector and erase from the index
575
+ result.Slice(sel, sel_count);
561
576
 
562
- indexes.Scan([&](Index &index) {
563
- index.Delete(result, row_identifiers);
564
- return false;
565
- });
577
+ indexes.Scan([&](Index &index) {
578
+ index.Delete(result, row_identifiers);
579
+ return false;
580
+ });
581
+ }
566
582
  }
567
583
 
568
584
  void RowGroupCollection::UpdateColumn(TransactionData transaction, Vector &row_ids, const vector<column_t> &column_path,