duckdb 0.3.5-dev966.0 → 0.3.5-dev971.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +39 -20
- package/src/duckdb.hpp +29 -4
- package/src/parquet-amalgamation.cpp +32294 -32294
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -297,7 +297,6 @@ public:
|
|
|
297
297
|
|
|
298
298
|
namespace duckdb {
|
|
299
299
|
|
|
300
|
-
class ColumnStatistics;
|
|
301
300
|
class DataTable;
|
|
302
301
|
struct CreateViewInfo;
|
|
303
302
|
|
|
@@ -3594,10 +3593,13 @@ TableCatalogEntry::TableCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schem
|
|
|
3594
3593
|
column_dependency_manager(move(info->column_dependency_manager)) {
|
|
3595
3594
|
this->temporary = info->Base().temporary;
|
|
3596
3595
|
// add lower case aliases
|
|
3596
|
+
this->name_map = move(info->name_map);
|
|
3597
|
+
#ifdef DEBUG
|
|
3598
|
+
D_ASSERT(name_map.size() == columns.size());
|
|
3597
3599
|
for (idx_t i = 0; i < columns.size(); i++) {
|
|
3598
|
-
D_ASSERT(name_map
|
|
3599
|
-
name_map[columns[i].Name()] = i;
|
|
3600
|
+
D_ASSERT(name_map[columns[i].Name()] == i);
|
|
3600
3601
|
}
|
|
3602
|
+
#endif
|
|
3601
3603
|
// add the "rowid" alias, if there is no rowid column specified in the table
|
|
3602
3604
|
if (name_map.find("rowid") == name_map.end()) {
|
|
3603
3605
|
name_map["rowid"] = COLUMN_IDENTIFIER_ROW_ID;
|
|
@@ -165243,7 +165245,9 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
|
|
|
165243
165245
|
if (column.Generated()) {
|
|
165244
165246
|
continue;
|
|
165245
165247
|
}
|
|
165246
|
-
|
|
165248
|
+
if (column.Type().id() == LogicalTypeId::VARCHAR) {
|
|
165249
|
+
ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
|
|
165250
|
+
}
|
|
165247
165251
|
BindLogicalType(context, column.TypeMutable());
|
|
165248
165252
|
if (column.Type().id() == LogicalTypeId::ENUM) {
|
|
165249
165253
|
// We add a catalog dependency
|
|
@@ -179096,7 +179100,10 @@ DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &t
|
|
|
179096
179100
|
}
|
|
179097
179101
|
row_groups->AppendSegment(move(new_row_group));
|
|
179098
179102
|
}
|
|
179099
|
-
column_stats
|
|
179103
|
+
column_stats.reserve(data->column_stats.size());
|
|
179104
|
+
for (auto &stats : data->column_stats) {
|
|
179105
|
+
column_stats.push_back(make_shared<ColumnStatistics>(move(stats)));
|
|
179106
|
+
}
|
|
179100
179107
|
if (column_stats.size() != types.size()) { // LCOV_EXCL_START
|
|
179101
179108
|
throw IOException("Table statistics column count is not aligned with table column count. Corrupt file?");
|
|
179102
179109
|
} // LCOV_EXCL_STOP
|
|
@@ -179106,7 +179113,7 @@ DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &t
|
|
|
179106
179113
|
|
|
179107
179114
|
AppendRowGroup(0);
|
|
179108
179115
|
for (auto &type : types) {
|
|
179109
|
-
column_stats.push_back(
|
|
179116
|
+
column_stats.push_back(ColumnStatistics::CreateEmptyStats(type));
|
|
179110
179117
|
}
|
|
179111
179118
|
} else {
|
|
179112
179119
|
D_ASSERT(column_stats.size() == types.size());
|
|
@@ -179134,9 +179141,9 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition
|
|
|
179134
179141
|
|
|
179135
179142
|
// set up the statistics
|
|
179136
179143
|
for (idx_t i = 0; i < parent.column_stats.size(); i++) {
|
|
179137
|
-
column_stats.push_back(parent.column_stats[i]
|
|
179144
|
+
column_stats.push_back(parent.column_stats[i]);
|
|
179138
179145
|
}
|
|
179139
|
-
column_stats.push_back(
|
|
179146
|
+
column_stats.push_back(ColumnStatistics::CreateEmptyStats(new_column_type));
|
|
179140
179147
|
|
|
179141
179148
|
// add the column definitions from this DataTable
|
|
179142
179149
|
column_definitions.emplace_back(new_column.Copy());
|
|
@@ -179159,7 +179166,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition
|
|
|
179159
179166
|
while (current_row_group) {
|
|
179160
179167
|
auto new_row_group = current_row_group->AddColumn(context, new_column, executor, default_value, result);
|
|
179161
179168
|
// merge in the statistics
|
|
179162
|
-
column_stats[new_column_idx]->Merge(*new_row_group->GetStatistics(new_column_idx));
|
|
179169
|
+
column_stats[new_column_idx]->stats->Merge(*new_row_group->GetStatistics(new_column_idx));
|
|
179163
179170
|
|
|
179164
179171
|
row_groups->AppendSegment(move(new_row_group));
|
|
179165
179172
|
current_row_group = (RowGroup *)current_row_group->next.get();
|
|
@@ -179195,7 +179202,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
|
|
|
179195
179202
|
// erase the stats from this DataTable
|
|
179196
179203
|
for (idx_t i = 0; i < parent.column_stats.size(); i++) {
|
|
179197
179204
|
if (i != removed_column) {
|
|
179198
|
-
column_stats.push_back(parent.column_stats[i]
|
|
179205
|
+
column_stats.push_back(parent.column_stats[i]);
|
|
179199
179206
|
}
|
|
179200
179207
|
}
|
|
179201
179208
|
|
|
@@ -179251,10 +179258,9 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_id
|
|
|
179251
179258
|
// the column that had its type changed will have the new statistics computed during conversion
|
|
179252
179259
|
for (idx_t i = 0; i < column_definitions.size(); i++) {
|
|
179253
179260
|
if (i == changed_idx) {
|
|
179254
|
-
column_stats.push_back(
|
|
179255
|
-
BaseStatistics::CreateEmpty(column_definitions[i].Type(), StatisticsType::GLOBAL_STATS));
|
|
179261
|
+
column_stats.push_back(ColumnStatistics::CreateEmptyStats(column_definitions[i].Type()));
|
|
179256
179262
|
} else {
|
|
179257
|
-
column_stats.push_back(parent.column_stats[i]
|
|
179263
|
+
column_stats.push_back(parent.column_stats[i]);
|
|
179258
179264
|
}
|
|
179259
179265
|
}
|
|
179260
179266
|
|
|
@@ -179285,7 +179291,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_id
|
|
|
179285
179291
|
while (current_row_group) {
|
|
179286
179292
|
auto new_row_group =
|
|
179287
179293
|
current_row_group->AlterType(context, target_type, changed_idx, executor, scan_state, scan_chunk);
|
|
179288
|
-
column_stats[changed_idx]->Merge(*new_row_group->GetStatistics(changed_idx));
|
|
179294
|
+
column_stats[changed_idx]->stats->Merge(*new_row_group->GetStatistics(changed_idx));
|
|
179289
179295
|
row_groups->AppendSegment(move(new_row_group));
|
|
179290
179296
|
current_row_group = (RowGroup *)current_row_group->next.get();
|
|
179291
179297
|
}
|
|
@@ -179778,7 +179784,7 @@ void DataTable::Append(Transaction &transaction, DataChunk &chunk, TableAppendSt
|
|
|
179778
179784
|
// merge the stats
|
|
179779
179785
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
179780
179786
|
for (idx_t i = 0; i < column_definitions.size(); i++) {
|
|
179781
|
-
column_stats[i]->Merge(*current_row_group->GetStatistics(i));
|
|
179787
|
+
column_stats[i]->stats->Merge(*current_row_group->GetStatistics(i));
|
|
179782
179788
|
}
|
|
179783
179789
|
}
|
|
179784
179790
|
state.remaining_append_count -= append_count;
|
|
@@ -179812,7 +179818,7 @@ void DataTable::Append(Transaction &transaction, DataChunk &chunk, TableAppendSt
|
|
|
179812
179818
|
if (type == PhysicalType::LIST || type == PhysicalType::STRUCT) {
|
|
179813
179819
|
continue;
|
|
179814
179820
|
}
|
|
179815
|
-
column_stats[col_idx]->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
|
|
179821
|
+
column_stats[col_idx]->stats->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
|
|
179816
179822
|
}
|
|
179817
179823
|
}
|
|
179818
179824
|
|
|
@@ -180257,7 +180263,7 @@ void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector
|
|
|
180257
180263
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
180258
180264
|
for (idx_t i = 0; i < column_ids.size(); i++) {
|
|
180259
180265
|
auto column_id = column_ids[i];
|
|
180260
|
-
column_stats[column_id]->Merge(*row_group->GetStatistics(column_id));
|
|
180266
|
+
column_stats[column_id]->stats->Merge(*row_group->GetStatistics(column_id));
|
|
180261
180267
|
}
|
|
180262
180268
|
} while (pos < count);
|
|
180263
180269
|
}
|
|
@@ -180290,7 +180296,7 @@ void DataTable::UpdateColumn(TableCatalogEntry &table, ClientContext &context, V
|
|
|
180290
180296
|
row_group->UpdateColumn(transaction, updates, row_ids, column_path);
|
|
180291
180297
|
|
|
180292
180298
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
180293
|
-
column_stats[primary_column_idx]->Merge(*row_group->GetStatistics(primary_column_idx));
|
|
180299
|
+
column_stats[primary_column_idx]->stats->Merge(*row_group->GetStatistics(primary_column_idx));
|
|
180294
180300
|
}
|
|
180295
180301
|
|
|
180296
180302
|
//===--------------------------------------------------------------------===//
|
|
@@ -180375,7 +180381,7 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
|
|
|
180375
180381
|
return nullptr;
|
|
180376
180382
|
}
|
|
180377
180383
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
180378
|
-
return column_stats[column_id]->Copy();
|
|
180384
|
+
return column_stats[column_id]->stats->Copy();
|
|
180379
180385
|
}
|
|
180380
180386
|
|
|
180381
180387
|
//===--------------------------------------------------------------------===//
|
|
@@ -180386,7 +180392,7 @@ BlockPointer DataTable::Checkpoint(TableDataWriter &writer) {
|
|
|
180386
180392
|
// FIXME: we might want to combine adjacent row groups in case they have had deletions...
|
|
180387
180393
|
vector<unique_ptr<BaseStatistics>> global_stats;
|
|
180388
180394
|
for (idx_t i = 0; i < column_definitions.size(); i++) {
|
|
180389
|
-
global_stats.push_back(column_stats[i]->Copy());
|
|
180395
|
+
global_stats.push_back(column_stats[i]->stats->Copy());
|
|
180390
180396
|
}
|
|
180391
180397
|
|
|
180392
180398
|
auto row_group = (RowGroup *)row_groups->GetRootSegment();
|
|
@@ -181852,6 +181858,19 @@ void BaseStatistics::Verify(Vector &vector, idx_t count) const {
|
|
|
181852
181858
|
} // namespace duckdb
|
|
181853
181859
|
|
|
181854
181860
|
|
|
181861
|
+
namespace duckdb {
|
|
181862
|
+
|
|
181863
|
+
ColumnStatistics::ColumnStatistics(unique_ptr<BaseStatistics> stats_p) : stats(move(stats_p)) {
|
|
181864
|
+
}
|
|
181865
|
+
|
|
181866
|
+
shared_ptr<ColumnStatistics> ColumnStatistics::CreateEmptyStats(const LogicalType &type) {
|
|
181867
|
+
auto col_stats = BaseStatistics::CreateEmpty(type, StatisticsType::GLOBAL_STATS);
|
|
181868
|
+
return make_shared<ColumnStatistics>(move(col_stats));
|
|
181869
|
+
}
|
|
181870
|
+
|
|
181871
|
+
} // namespace duckdb
|
|
181872
|
+
|
|
181873
|
+
|
|
181855
181874
|
|
|
181856
181875
|
|
|
181857
181876
|
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "dbb043b11"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev971"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -11653,7 +11653,6 @@ private:
|
|
|
11653
11653
|
|
|
11654
11654
|
namespace duckdb {
|
|
11655
11655
|
|
|
11656
|
-
class ColumnStatistics;
|
|
11657
11656
|
class DataTable;
|
|
11658
11657
|
struct CreateTableInfo;
|
|
11659
11658
|
struct BoundCreateTableInfo;
|
|
@@ -21370,6 +21369,32 @@ private:
|
|
|
21370
21369
|
|
|
21371
21370
|
|
|
21372
21371
|
|
|
21372
|
+
//===----------------------------------------------------------------------===//
|
|
21373
|
+
// DuckDB
|
|
21374
|
+
//
|
|
21375
|
+
// duckdb/storage/statistics/column_statistics.hpp
|
|
21376
|
+
//
|
|
21377
|
+
//
|
|
21378
|
+
//===----------------------------------------------------------------------===//
|
|
21379
|
+
|
|
21380
|
+
|
|
21381
|
+
|
|
21382
|
+
|
|
21383
|
+
|
|
21384
|
+
namespace duckdb {
|
|
21385
|
+
|
|
21386
|
+
class ColumnStatistics {
|
|
21387
|
+
public:
|
|
21388
|
+
explicit ColumnStatistics(unique_ptr<BaseStatistics> stats_p);
|
|
21389
|
+
|
|
21390
|
+
unique_ptr<BaseStatistics> stats;
|
|
21391
|
+
|
|
21392
|
+
public:
|
|
21393
|
+
static shared_ptr<ColumnStatistics> CreateEmptyStats(const LogicalType &type);
|
|
21394
|
+
};
|
|
21395
|
+
|
|
21396
|
+
} // namespace duckdb
|
|
21397
|
+
|
|
21373
21398
|
|
|
21374
21399
|
|
|
21375
21400
|
|
|
@@ -21604,7 +21629,7 @@ private:
|
|
|
21604
21629
|
//! The segment trees holding the various row_groups of the table
|
|
21605
21630
|
shared_ptr<SegmentTree> row_groups;
|
|
21606
21631
|
//! Column statistics
|
|
21607
|
-
vector<
|
|
21632
|
+
vector<shared_ptr<ColumnStatistics>> column_stats;
|
|
21608
21633
|
//! The statistics lock
|
|
21609
21634
|
mutex stats_lock;
|
|
21610
21635
|
//! Whether or not the data table is the root DataTable for this table; the root DataTable is the newest version
|