duckdb 0.3.5-dev966.0 → 0.3.5-dev971.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.3.5-dev966.0",
4
+ "version": "0.3.5-dev971.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -297,7 +297,6 @@ public:
297
297
 
298
298
  namespace duckdb {
299
299
 
300
- class ColumnStatistics;
301
300
  class DataTable;
302
301
  struct CreateViewInfo;
303
302
 
@@ -3594,10 +3593,13 @@ TableCatalogEntry::TableCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schem
3594
3593
  column_dependency_manager(move(info->column_dependency_manager)) {
3595
3594
  this->temporary = info->Base().temporary;
3596
3595
  // add lower case aliases
3596
+ this->name_map = move(info->name_map);
3597
+ #ifdef DEBUG
3598
+ D_ASSERT(name_map.size() == columns.size());
3597
3599
  for (idx_t i = 0; i < columns.size(); i++) {
3598
- D_ASSERT(name_map.find(columns[i].Name()) == name_map.end());
3599
- name_map[columns[i].Name()] = i;
3600
+ D_ASSERT(name_map[columns[i].Name()] == i);
3600
3601
  }
3602
+ #endif
3601
3603
  // add the "rowid" alias, if there is no rowid column specified in the table
3602
3604
  if (name_map.find("rowid") == name_map.end()) {
3603
3605
  name_map["rowid"] = COLUMN_IDENTIFIER_ROW_ID;
@@ -165243,7 +165245,9 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
165243
165245
  if (column.Generated()) {
165244
165246
  continue;
165245
165247
  }
165246
- ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
165248
+ if (column.Type().id() == LogicalTypeId::VARCHAR) {
165249
+ ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
165250
+ }
165247
165251
  BindLogicalType(context, column.TypeMutable());
165248
165252
  if (column.Type().id() == LogicalTypeId::ENUM) {
165249
165253
  // We add a catalog dependency
@@ -179096,7 +179100,10 @@ DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &t
179096
179100
  }
179097
179101
  row_groups->AppendSegment(move(new_row_group));
179098
179102
  }
179099
- column_stats = move(data->column_stats);
179103
+ column_stats.reserve(data->column_stats.size());
179104
+ for (auto &stats : data->column_stats) {
179105
+ column_stats.push_back(make_shared<ColumnStatistics>(move(stats)));
179106
+ }
179100
179107
  if (column_stats.size() != types.size()) { // LCOV_EXCL_START
179101
179108
  throw IOException("Table statistics column count is not aligned with table column count. Corrupt file?");
179102
179109
  } // LCOV_EXCL_STOP
@@ -179106,7 +179113,7 @@ DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &t
179106
179113
 
179107
179114
  AppendRowGroup(0);
179108
179115
  for (auto &type : types) {
179109
- column_stats.push_back(BaseStatistics::CreateEmpty(type, StatisticsType::GLOBAL_STATS));
179116
+ column_stats.push_back(ColumnStatistics::CreateEmptyStats(type));
179110
179117
  }
179111
179118
  } else {
179112
179119
  D_ASSERT(column_stats.size() == types.size());
@@ -179134,9 +179141,9 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition
179134
179141
 
179135
179142
  // set up the statistics
179136
179143
  for (idx_t i = 0; i < parent.column_stats.size(); i++) {
179137
- column_stats.push_back(parent.column_stats[i]->Copy());
179144
+ column_stats.push_back(parent.column_stats[i]);
179138
179145
  }
179139
- column_stats.push_back(BaseStatistics::CreateEmpty(new_column_type, StatisticsType::GLOBAL_STATS));
179146
+ column_stats.push_back(ColumnStatistics::CreateEmptyStats(new_column_type));
179140
179147
 
179141
179148
  // add the column definitions from this DataTable
179142
179149
  column_definitions.emplace_back(new_column.Copy());
@@ -179159,7 +179166,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition
179159
179166
  while (current_row_group) {
179160
179167
  auto new_row_group = current_row_group->AddColumn(context, new_column, executor, default_value, result);
179161
179168
  // merge in the statistics
179162
- column_stats[new_column_idx]->Merge(*new_row_group->GetStatistics(new_column_idx));
179169
+ column_stats[new_column_idx]->stats->Merge(*new_row_group->GetStatistics(new_column_idx));
179163
179170
 
179164
179171
  row_groups->AppendSegment(move(new_row_group));
179165
179172
  current_row_group = (RowGroup *)current_row_group->next.get();
@@ -179195,7 +179202,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
179195
179202
  // erase the stats from this DataTable
179196
179203
  for (idx_t i = 0; i < parent.column_stats.size(); i++) {
179197
179204
  if (i != removed_column) {
179198
- column_stats.push_back(parent.column_stats[i]->Copy());
179205
+ column_stats.push_back(parent.column_stats[i]);
179199
179206
  }
179200
179207
  }
179201
179208
 
@@ -179251,10 +179258,9 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_id
179251
179258
  // the column that had its type changed will have the new statistics computed during conversion
179252
179259
  for (idx_t i = 0; i < column_definitions.size(); i++) {
179253
179260
  if (i == changed_idx) {
179254
- column_stats.push_back(
179255
- BaseStatistics::CreateEmpty(column_definitions[i].Type(), StatisticsType::GLOBAL_STATS));
179261
+ column_stats.push_back(ColumnStatistics::CreateEmptyStats(column_definitions[i].Type()));
179256
179262
  } else {
179257
- column_stats.push_back(parent.column_stats[i]->Copy());
179263
+ column_stats.push_back(parent.column_stats[i]);
179258
179264
  }
179259
179265
  }
179260
179266
 
@@ -179285,7 +179291,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_id
179285
179291
  while (current_row_group) {
179286
179292
  auto new_row_group =
179287
179293
  current_row_group->AlterType(context, target_type, changed_idx, executor, scan_state, scan_chunk);
179288
- column_stats[changed_idx]->Merge(*new_row_group->GetStatistics(changed_idx));
179294
+ column_stats[changed_idx]->stats->Merge(*new_row_group->GetStatistics(changed_idx));
179289
179295
  row_groups->AppendSegment(move(new_row_group));
179290
179296
  current_row_group = (RowGroup *)current_row_group->next.get();
179291
179297
  }
@@ -179778,7 +179784,7 @@ void DataTable::Append(Transaction &transaction, DataChunk &chunk, TableAppendSt
179778
179784
  // merge the stats
179779
179785
  lock_guard<mutex> stats_guard(stats_lock);
179780
179786
  for (idx_t i = 0; i < column_definitions.size(); i++) {
179781
- column_stats[i]->Merge(*current_row_group->GetStatistics(i));
179787
+ column_stats[i]->stats->Merge(*current_row_group->GetStatistics(i));
179782
179788
  }
179783
179789
  }
179784
179790
  state.remaining_append_count -= append_count;
@@ -179812,7 +179818,7 @@ void DataTable::Append(Transaction &transaction, DataChunk &chunk, TableAppendSt
179812
179818
  if (type == PhysicalType::LIST || type == PhysicalType::STRUCT) {
179813
179819
  continue;
179814
179820
  }
179815
- column_stats[col_idx]->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
179821
+ column_stats[col_idx]->stats->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
179816
179822
  }
179817
179823
  }
179818
179824
 
@@ -180257,7 +180263,7 @@ void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector
180257
180263
  lock_guard<mutex> stats_guard(stats_lock);
180258
180264
  for (idx_t i = 0; i < column_ids.size(); i++) {
180259
180265
  auto column_id = column_ids[i];
180260
- column_stats[column_id]->Merge(*row_group->GetStatistics(column_id));
180266
+ column_stats[column_id]->stats->Merge(*row_group->GetStatistics(column_id));
180261
180267
  }
180262
180268
  } while (pos < count);
180263
180269
  }
@@ -180290,7 +180296,7 @@ void DataTable::UpdateColumn(TableCatalogEntry &table, ClientContext &context, V
180290
180296
  row_group->UpdateColumn(transaction, updates, row_ids, column_path);
180291
180297
 
180292
180298
  lock_guard<mutex> stats_guard(stats_lock);
180293
- column_stats[primary_column_idx]->Merge(*row_group->GetStatistics(primary_column_idx));
180299
+ column_stats[primary_column_idx]->stats->Merge(*row_group->GetStatistics(primary_column_idx));
180294
180300
  }
180295
180301
 
180296
180302
  //===--------------------------------------------------------------------===//
@@ -180375,7 +180381,7 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
180375
180381
  return nullptr;
180376
180382
  }
180377
180383
  lock_guard<mutex> stats_guard(stats_lock);
180378
- return column_stats[column_id]->Copy();
180384
+ return column_stats[column_id]->stats->Copy();
180379
180385
  }
180380
180386
 
180381
180387
  //===--------------------------------------------------------------------===//
@@ -180386,7 +180392,7 @@ BlockPointer DataTable::Checkpoint(TableDataWriter &writer) {
180386
180392
  // FIXME: we might want to combine adjacent row groups in case they have had deletions...
180387
180393
  vector<unique_ptr<BaseStatistics>> global_stats;
180388
180394
  for (idx_t i = 0; i < column_definitions.size(); i++) {
180389
- global_stats.push_back(column_stats[i]->Copy());
180395
+ global_stats.push_back(column_stats[i]->stats->Copy());
180390
180396
  }
180391
180397
 
180392
180398
  auto row_group = (RowGroup *)row_groups->GetRootSegment();
@@ -181852,6 +181858,19 @@ void BaseStatistics::Verify(Vector &vector, idx_t count) const {
181852
181858
  } // namespace duckdb
181853
181859
 
181854
181860
 
181861
+ namespace duckdb {
181862
+
181863
+ ColumnStatistics::ColumnStatistics(unique_ptr<BaseStatistics> stats_p) : stats(move(stats_p)) {
181864
+ }
181865
+
181866
+ shared_ptr<ColumnStatistics> ColumnStatistics::CreateEmptyStats(const LogicalType &type) {
181867
+ auto col_stats = BaseStatistics::CreateEmpty(type, StatisticsType::GLOBAL_STATS);
181868
+ return make_shared<ColumnStatistics>(move(col_stats));
181869
+ }
181870
+
181871
+ } // namespace duckdb
181872
+
181873
+
181855
181874
 
181856
181875
 
181857
181876
 
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "466fb1f9d"
15
- #define DUCKDB_VERSION "v0.3.5-dev966"
14
+ #define DUCKDB_SOURCE_ID "dbb043b11"
15
+ #define DUCKDB_VERSION "v0.3.5-dev971"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -11653,7 +11653,6 @@ private:
11653
11653
 
11654
11654
  namespace duckdb {
11655
11655
 
11656
- class ColumnStatistics;
11657
11656
  class DataTable;
11658
11657
  struct CreateTableInfo;
11659
11658
  struct BoundCreateTableInfo;
@@ -21370,6 +21369,32 @@ private:
21370
21369
 
21371
21370
 
21372
21371
 
21372
+ //===----------------------------------------------------------------------===//
21373
+ // DuckDB
21374
+ //
21375
+ // duckdb/storage/statistics/column_statistics.hpp
21376
+ //
21377
+ //
21378
+ //===----------------------------------------------------------------------===//
21379
+
21380
+
21381
+
21382
+
21383
+
21384
+ namespace duckdb {
21385
+
21386
+ class ColumnStatistics {
21387
+ public:
21388
+ explicit ColumnStatistics(unique_ptr<BaseStatistics> stats_p);
21389
+
21390
+ unique_ptr<BaseStatistics> stats;
21391
+
21392
+ public:
21393
+ static shared_ptr<ColumnStatistics> CreateEmptyStats(const LogicalType &type);
21394
+ };
21395
+
21396
+ } // namespace duckdb
21397
+
21373
21398
 
21374
21399
 
21375
21400
 
@@ -21604,7 +21629,7 @@ private:
21604
21629
  //! The segment trees holding the various row_groups of the table
21605
21630
  shared_ptr<SegmentTree> row_groups;
21606
21631
  //! Column statistics
21607
- vector<unique_ptr<BaseStatistics>> column_stats;
21632
+ vector<shared_ptr<ColumnStatistics>> column_stats;
21608
21633
  //! The statistics lock
21609
21634
  mutex stats_lock;
21610
21635
  //! Whether or not the data table is the root DataTable for this table; the root DataTable is the newest version