duckdb 0.5.1-dev136.0 → 0.5.1-dev149.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.1-dev136.0",
4
+ "version": "0.5.1-dev149.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -3763,6 +3763,19 @@ idx_t TableCatalogEntry::StandardColumnCount() const {
3763
3763
  return count;
3764
3764
  }
3765
3765
 
3766
+ unique_ptr<BaseStatistics> TableCatalogEntry::GetStatistics(ClientContext &context, column_t column_id) {
3767
+ if (column_id == COLUMN_IDENTIFIER_ROW_ID) {
3768
+ return nullptr;
3769
+ }
3770
+ if (column_id >= columns.size()) {
3771
+ throw InternalException("TableCatalogEntry::GetStatistics column_id out of range");
3772
+ }
3773
+ if (columns[column_id].Generated()) {
3774
+ return nullptr;
3775
+ }
3776
+ return storage->GetStatistics(context, columns[column_id].StorageOid());
3777
+ }
3778
+
3766
3779
  unique_ptr<CatalogEntry> TableCatalogEntry::AlterEntry(ClientContext &context, AlterInfo *info) {
3767
3780
  D_ASSERT(!internal);
3768
3781
  if (info->type != AlterType::ALTER_TABLE) {
@@ -4052,7 +4065,9 @@ unique_ptr<CatalogEntry> TableCatalogEntry::SetDefault(ClientContext &context, S
4052
4065
  auto copy = columns[i].Copy();
4053
4066
  if (default_idx == i) {
4054
4067
  // set the default value of this column
4055
- D_ASSERT(!copy.Generated()); // Shouldnt reach here - DEFAULT value isn't supported for Generated Columns
4068
+ if (copy.Generated()) {
4069
+ throw BinderException("Cannot SET DEFAULT for generated column \"%s\"", columns[i].Name());
4070
+ }
4056
4071
  copy.SetDefaultValue(info.expression ? info.expression->Copy() : nullptr);
4057
4072
  }
4058
4073
  create_info->columns.push_back(move(copy));
@@ -19202,6 +19217,8 @@ private:
19202
19217
  //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location
19203
19218
  void SetFilePointer(FileHandle &handle, idx_t location);
19204
19219
  idx_t GetFilePointer(FileHandle &handle);
19220
+
19221
+ vector<string> FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path);
19205
19222
  };
19206
19223
 
19207
19224
  } // namespace duckdb
@@ -20083,6 +20100,26 @@ static void GlobFiles(FileSystem &fs, const string &path, const string &glob, bo
20083
20100
  });
20084
20101
  }
20085
20102
 
20103
+ vector<string> LocalFileSystem::FetchFileWithoutGlob(const string &path, FileOpener *opener, bool absolute_path) {
20104
+ vector<string> result;
20105
+ if (FileExists(path) || IsPipe(path)) {
20106
+ result.push_back(path);
20107
+ } else if (!absolute_path) {
20108
+ Value value;
20109
+ if (opener->TryGetCurrentSetting("file_search_path", value)) {
20110
+ auto search_paths_str = value.ToString();
20111
+ std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
20112
+ for (const auto &search_path : search_paths) {
20113
+ auto joined_path = JoinPath(search_path, path);
20114
+ if (FileExists(joined_path) || IsPipe(joined_path)) {
20115
+ result.push_back(joined_path);
20116
+ }
20117
+ }
20118
+ }
20119
+ }
20120
+ return result;
20121
+ }
20122
+
20086
20123
  vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20087
20124
  if (path.empty()) {
20088
20125
  return vector<string>();
@@ -20129,23 +20166,7 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20129
20166
  // Check if the path has a glob at all
20130
20167
  if (!HasGlob(path)) {
20131
20168
  // no glob: return only the file (if it exists or is a pipe)
20132
- vector<string> result;
20133
- if (FileExists(path) || IsPipe(path)) {
20134
- result.push_back(path);
20135
- } else if (!absolute_path) {
20136
- Value value;
20137
- if (opener->TryGetCurrentSetting("file_search_path", value)) {
20138
- auto search_paths_str = value.ToString();
20139
- std::vector<std::string> search_paths = StringUtil::Split(search_paths_str, ',');
20140
- for (const auto &search_path : search_paths) {
20141
- auto joined_path = JoinPath(search_path, path);
20142
- if (FileExists(joined_path) || IsPipe(joined_path)) {
20143
- result.push_back(joined_path);
20144
- }
20145
- }
20146
- }
20147
- }
20148
- return result;
20169
+ return FetchFileWithoutGlob(path, opener, absolute_path);
20149
20170
  }
20150
20171
  vector<string> previous_directories;
20151
20172
  if (absolute_path) {
@@ -20179,7 +20200,12 @@ vector<string> LocalFileSystem::Glob(const string &path, FileOpener *opener) {
20179
20200
  }
20180
20201
  }
20181
20202
  }
20182
- if (is_last_chunk || result.empty()) {
20203
+ if (result.empty()) {
20204
+ // no result found that matches the glob
20205
+ // last ditch effort: search the path as a string literal
20206
+ return FetchFileWithoutGlob(path, opener, absolute_path);
20207
+ }
20208
+ if (is_last_chunk) {
20183
20209
  return result;
20184
20210
  }
20185
20211
  previous_directories = move(result);
@@ -22528,14 +22554,16 @@ struct IntervalToStringCast {
22528
22554
  if (micros < 0) {
22529
22555
  // negative time: append negative sign
22530
22556
  buffer[length++] = '-';
22557
+ } else {
22531
22558
  micros = -micros;
22532
22559
  }
22533
- int64_t hour = micros / Interval::MICROS_PER_HOUR;
22534
- micros -= hour * Interval::MICROS_PER_HOUR;
22535
- int64_t min = micros / Interval::MICROS_PER_MINUTE;
22536
- micros -= min * Interval::MICROS_PER_MINUTE;
22537
- int64_t sec = micros / Interval::MICROS_PER_SEC;
22538
- micros -= sec * Interval::MICROS_PER_SEC;
22560
+ int64_t hour = -(micros / Interval::MICROS_PER_HOUR);
22561
+ micros += hour * Interval::MICROS_PER_HOUR;
22562
+ int64_t min = -(micros / Interval::MICROS_PER_MINUTE);
22563
+ micros += min * Interval::MICROS_PER_MINUTE;
22564
+ int64_t sec = -(micros / Interval::MICROS_PER_SEC);
22565
+ micros += sec * Interval::MICROS_PER_SEC;
22566
+ micros = -micros;
22539
22567
 
22540
22568
  if (hour < 10) {
22541
22569
  buffer[length++] = '0';
@@ -80790,6 +80818,7 @@ void PhysicalRecursiveCTE::ExecuteRecursivePipelines(ExecutionContext &context)
80790
80818
  void PhysicalRecursiveCTE::BuildPipelines(Executor &executor, Pipeline &current, PipelineBuildState &state) {
80791
80819
  op_state.reset();
80792
80820
  sink_state.reset();
80821
+ pipelines.clear();
80793
80822
 
80794
80823
  // recursive CTE
80795
80824
  state.SetPipelineSource(current, this);
@@ -121643,8 +121672,7 @@ static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, co
121643
121672
  // we don't emit any statistics for tables that have outstanding transaction-local data
121644
121673
  return nullptr;
121645
121674
  }
121646
- auto storage_idx = GetStorageIndex(*bind_data.table, column_id);
121647
- return bind_data.table->storage->GetStatistics(context, storage_idx);
121675
+ return bind_data.table->GetStatistics(context, column_id);
121648
121676
  }
121649
121677
 
121650
121678
  static void TableScanFunc(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
@@ -142546,9 +142574,7 @@ void CardinalityEstimator::UpdateTotalDomains(JoinNode *node, LogicalOperator *o
142546
142574
  // Get HLL stats here
142547
142575
  auto actual_binding = relation_column_to_original_column[key];
142548
142576
 
142549
- // sometimes base stats is null (test_709.test) returns null for base stats while
142550
- // there is still a catalog table. Anybody know anything about this?
142551
- auto base_stats = catalog_table->storage->GetStatistics(context, actual_binding.column_index);
142577
+ auto base_stats = catalog_table->GetStatistics(context, actual_binding.column_index);
142552
142578
  if (base_stats) {
142553
142579
  count = base_stats->GetDistinctCount();
142554
142580
  }
@@ -147908,21 +147934,35 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownAggregate(unique_ptr<Logical
147908
147934
  FilterPushdown child_pushdown(optimizer);
147909
147935
  for (idx_t i = 0; i < filters.size(); i++) {
147910
147936
  auto &f = *filters[i];
147911
- // check if any aggregate or GROUPING functions are in the set
147912
- if (f.bindings.find(aggr.aggregate_index) == f.bindings.end() &&
147913
- f.bindings.find(aggr.groupings_index) == f.bindings.end()) {
147914
- // no aggregate! we can push this down
147915
- // rewrite any group bindings within the filter
147916
- f.filter = ReplaceGroupBindings(aggr, move(f.filter));
147917
- // add the filter to the child node
147918
- if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
147919
- // filter statically evaluates to false, strip tree
147920
- return make_unique<LogicalEmptyResult>(move(op));
147937
+ if (f.bindings.find(aggr.aggregate_index) != f.bindings.end()) {
147938
+ // filter on aggregate: cannot pushdown
147939
+ continue;
147940
+ }
147941
+ if (f.bindings.find(aggr.groupings_index) != f.bindings.end()) {
147942
+ // filter on GROUPINGS function: cannot pushdown
147943
+ continue;
147944
+ }
147945
+ // if there are any empty grouping sets, we cannot push down filters
147946
+ bool has_empty_grouping_sets = false;
147947
+ for (auto &grp : aggr.grouping_sets) {
147948
+ if (grp.empty()) {
147949
+ has_empty_grouping_sets = true;
147921
147950
  }
147922
- // erase the filter from here
147923
- filters.erase(filters.begin() + i);
147924
- i--;
147925
147951
  }
147952
+ if (has_empty_grouping_sets) {
147953
+ continue;
147954
+ }
147955
+ // no aggregate! we can push this down
147956
+ // rewrite any group bindings within the filter
147957
+ f.filter = ReplaceGroupBindings(aggr, move(f.filter));
147958
+ // add the filter to the child node
147959
+ if (child_pushdown.AddFilter(move(f.filter)) == FilterResult::UNSATISFIABLE) {
147960
+ // filter statically evaluates to false, strip tree
147961
+ return make_unique<LogicalEmptyResult>(move(op));
147962
+ }
147963
+ // erase the filter from here
147964
+ filters.erase(filters.begin() + i);
147965
+ i--;
147926
147966
  }
147927
147967
  child_pushdown.GenerateFilters();
147928
147968
 
@@ -177180,6 +177220,7 @@ protected:
177180
177220
  BindResult BindExpression(unique_ptr<ParsedExpression> *expr, idx_t depth, bool root_expression = false) override;
177181
177221
 
177182
177222
  string UnsupportedAggregateMessage() override;
177223
+ bool CanContainSubqueries() override;
177183
177224
  };
177184
177225
 
177185
177226
  } // namespace duckdb
@@ -185042,6 +185083,10 @@ string ConstantBinder::UnsupportedAggregateMessage() {
185042
185083
  return clause + " cannot contain aggregates!";
185043
185084
  }
185044
185085
 
185086
+ bool ConstantBinder::CanContainSubqueries() {
185087
+ return false;
185088
+ }
185089
+
185045
185090
  } // namespace duckdb
185046
185091
 
185047
185092
 
@@ -185843,6 +185888,9 @@ unique_ptr<Expression> ExpressionBinder::Bind(unique_ptr<ParsedExpression> &expr
185843
185888
  // bind the main expression
185844
185889
  auto error_msg = Bind(&expr, 0, root_expression);
185845
185890
  if (!error_msg.empty()) {
185891
+ if (!CanContainSubqueries()) {
185892
+ throw BinderException(error_msg);
185893
+ }
185846
185894
  // failed to bind: try to bind correlated columns in the expression (if any)
185847
185895
  bool success = BindCorrelatedColumns(expr);
185848
185896
  if (!success) {
@@ -185900,6 +185948,10 @@ string ExpressionBinder::Bind(unique_ptr<ParsedExpression> *expr, idx_t depth, b
185900
185948
  return string();
185901
185949
  }
185902
185950
 
185951
+ bool ExpressionBinder::CanContainSubqueries() {
185952
+ return true;
185953
+ }
185954
+
185903
185955
  } // namespace duckdb
185904
185956
 
185905
185957
 
@@ -198318,6 +198370,9 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
198318
198370
  return nullptr;
198319
198371
  }
198320
198372
  lock_guard<mutex> stats_guard(stats_lock);
198373
+ if (column_id >= column_stats.size()) {
198374
+ throw InternalException("Call to GetStatistics is out of range");
198375
+ }
198321
198376
  return column_stats[column_id]->stats->Copy();
198322
198377
  }
198323
198378
 
@@ -201132,6 +201187,7 @@ idx_t ChunkVectorInfo::Delete(Transaction &transaction, row_t rows[], idx_t coun
201132
201187
  }
201133
201188
  // after verifying that there are no conflicts we mark the tuple as deleted
201134
201189
  deleted[rows[i]] = transaction.transaction_id;
201190
+ rows[deleted_tuples] = rows[i];
201135
201191
  deleted_tuples++;
201136
201192
  }
201137
201193
  return deleted_tuples;
@@ -204038,9 +204094,15 @@ void VersionDeleteState::Flush() {
204038
204094
  return;
204039
204095
  }
204040
204096
  // delete in the current info
204041
- delete_count += current_info->Delete(transaction, rows, count);
204042
- // now push the delete into the undo buffer
204043
- transaction.PushDelete(table, current_info, rows, count, base_row + chunk_row);
204097
+ // it is possible for delete statements to delete the same tuple multiple times when combined with a USING clause
204098
+ // in the current_info->Delete, we check which tuples are actually deleted (excluding duplicate deletions)
204099
+ // this is returned in the actual_delete_count
204100
+ auto actual_delete_count = current_info->Delete(transaction, rows, count);
204101
+ delete_count += actual_delete_count;
204102
+ if (actual_delete_count > 0) {
204103
+ // now push the delete into the undo buffer, but only if any deletes were actually performed
204104
+ transaction.PushDelete(table, current_info, rows, actual_delete_count, base_row + chunk_row);
204105
+ }
204044
204106
  count = 0;
204045
204107
  }
204046
204108
 
@@ -206876,6 +206938,7 @@ void CleanupState::CleanupUpdate(UpdateInfo *info) {
206876
206938
 
206877
206939
  void CleanupState::CleanupDelete(DeleteInfo *info) {
206878
206940
  auto version_table = info->table;
206941
+ D_ASSERT(version_table->info->cardinality >= info->count);
206879
206942
  version_table->info->cardinality -= info->count;
206880
206943
  if (version_table->info->indexes.Empty()) {
206881
206944
  // this table has no indexes: no cleanup to be done
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "6823f459f"
15
- #define DUCKDB_VERSION "v0.5.1-dev136"
14
+ #define DUCKDB_SOURCE_ID "bb671e06a"
15
+ #define DUCKDB_VERSION "v0.5.1-dev149"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -9935,6 +9935,9 @@ public:
9935
9935
  vector<LogicalType> GetTypes();
9936
9936
  string ToSQL() override;
9937
9937
 
9938
+ //! Get statistics of a column (physical or virtual) within the table
9939
+ unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id);
9940
+
9938
9941
  //! Serialize the meta information of the TableCatalogEntry a serializer
9939
9942
  virtual void Serialize(Serializer &serializer);
9940
9943
  //! Deserializes to a CreateTableInfo
@@ -15987,6 +15990,7 @@ protected:
15987
15990
 
15988
15991
  virtual string UnsupportedAggregateMessage();
15989
15992
  virtual string UnsupportedUnnestMessage();
15993
+ virtual bool CanContainSubqueries();
15990
15994
 
15991
15995
  Binder &binder;
15992
15996
  ClientContext &context;
@@ -22751,6 +22755,11 @@ public:
22751
22755
  void CommitAppend(transaction_t commit_id, idx_t start, idx_t end) override;
22752
22756
 
22753
22757
  void Append(idx_t start, idx_t end, transaction_t commit_id);
22758
+ //! Performs a delete in the ChunkVectorInfo - returns how many tuples were actually deleted
22759
+ //! The number of rows that were actually deleted might be lower than the input count
22760
+ //! In case we delete rows that were already deleted
22761
+ //! Note that "rows" is written to to reflect the row ids that were actually deleted
22762
+ //! i.e. after calling this function, rows will hold [0..actual_delete_count] row ids of the actually deleted tuples
22754
22763
  idx_t Delete(Transaction &transaction, row_t rows[], idx_t count);
22755
22764
  void CommitDelete(transaction_t commit_id, row_t rows[], idx_t count);
22756
22765
 
@@ -24382,7 +24391,9 @@ public:
24382
24391
  this->is_root = true;
24383
24392
  }
24384
24393
 
24394
+ //! Get statistics of a physical column within the table
24385
24395
  unique_ptr<BaseStatistics> GetStatistics(ClientContext &context, column_t column_id);
24396
+ //! Sets statistics of a physical column within the table
24386
24397
  void SetStatistics(column_t column_id, const std::function<void(BaseStatistics &)> &set_fun);
24387
24398
 
24388
24399
  //! Checkpoint the table to the specified table data writer