duckdb 0.3.5-dev966.0 → 0.3.5-dev983.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +53 -22
- package/src/duckdb.hpp +29 -4
- package/src/parquet-amalgamation.cpp +29418 -29418
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -297,7 +297,6 @@ public:
|
|
|
297
297
|
|
|
298
298
|
namespace duckdb {
|
|
299
299
|
|
|
300
|
-
class ColumnStatistics;
|
|
301
300
|
class DataTable;
|
|
302
301
|
struct CreateViewInfo;
|
|
303
302
|
|
|
@@ -1531,7 +1530,7 @@ public:
|
|
|
1531
1530
|
}
|
|
1532
1531
|
|
|
1533
1532
|
template <class T, class RETURN_TYPE = unique_ptr<T>, typename... ARGS>
|
|
1534
|
-
RETURN_TYPE ReadSerializable(RETURN_TYPE default_value, ARGS &&...
|
|
1533
|
+
RETURN_TYPE ReadSerializable(RETURN_TYPE default_value, ARGS &&...args) {
|
|
1535
1534
|
if (field_count >= max_field_count) {
|
|
1536
1535
|
// field is not there, read the default value
|
|
1537
1536
|
return default_value;
|
|
@@ -1553,7 +1552,7 @@ public:
|
|
|
1553
1552
|
}
|
|
1554
1553
|
|
|
1555
1554
|
template <class T, class RETURN_TYPE = unique_ptr<T>, typename... ARGS>
|
|
1556
|
-
RETURN_TYPE ReadRequiredSerializable(ARGS &&...
|
|
1555
|
+
RETURN_TYPE ReadRequiredSerializable(ARGS &&...args) {
|
|
1557
1556
|
if (field_count >= max_field_count) {
|
|
1558
1557
|
// field is not there, read the default value
|
|
1559
1558
|
throw SerializationException("Attempting to read mandatory field, but field is missing");
|
|
@@ -3594,10 +3593,13 @@ TableCatalogEntry::TableCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schem
|
|
|
3594
3593
|
column_dependency_manager(move(info->column_dependency_manager)) {
|
|
3595
3594
|
this->temporary = info->Base().temporary;
|
|
3596
3595
|
// add lower case aliases
|
|
3596
|
+
this->name_map = move(info->name_map);
|
|
3597
|
+
#ifdef DEBUG
|
|
3598
|
+
D_ASSERT(name_map.size() == columns.size());
|
|
3597
3599
|
for (idx_t i = 0; i < columns.size(); i++) {
|
|
3598
|
-
D_ASSERT(name_map
|
|
3599
|
-
name_map[columns[i].Name()] = i;
|
|
3600
|
+
D_ASSERT(name_map[columns[i].Name()] == i);
|
|
3600
3601
|
}
|
|
3602
|
+
#endif
|
|
3601
3603
|
// add the "rowid" alias, if there is no rowid column specified in the table
|
|
3602
3604
|
if (name_map.find("rowid") == name_map.end()) {
|
|
3603
3605
|
name_map["rowid"] = COLUMN_IDENTIFIER_ROW_ID;
|
|
@@ -98694,6 +98696,9 @@ void AddFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
98694
98696
|
functions.AddFunction(ListConcatFun::GetFunction());
|
|
98695
98697
|
|
|
98696
98698
|
set.AddFunction(functions);
|
|
98699
|
+
|
|
98700
|
+
functions.name = "add";
|
|
98701
|
+
set.AddFunction(functions);
|
|
98697
98702
|
}
|
|
98698
98703
|
|
|
98699
98704
|
//===--------------------------------------------------------------------===//
|
|
@@ -98912,6 +98917,9 @@ void SubtractFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
98912
98917
|
// we can negate intervals
|
|
98913
98918
|
functions.AddFunction(GetFunction(LogicalType::INTERVAL));
|
|
98914
98919
|
set.AddFunction(functions);
|
|
98920
|
+
|
|
98921
|
+
functions.name = "subtract";
|
|
98922
|
+
set.AddFunction(functions);
|
|
98915
98923
|
}
|
|
98916
98924
|
|
|
98917
98925
|
//===--------------------------------------------------------------------===//
|
|
@@ -99044,6 +99052,9 @@ void MultiplyFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
99044
99052
|
ScalarFunction({LogicalType::BIGINT, LogicalType::INTERVAL}, LogicalType::INTERVAL,
|
|
99045
99053
|
ScalarFunction::BinaryFunction<int64_t, interval_t, interval_t, MultiplyOperator>, true));
|
|
99046
99054
|
set.AddFunction(functions);
|
|
99055
|
+
|
|
99056
|
+
functions.name = "multiply";
|
|
99057
|
+
set.AddFunction(functions);
|
|
99047
99058
|
}
|
|
99048
99059
|
|
|
99049
99060
|
//===--------------------------------------------------------------------===//
|
|
@@ -99165,6 +99176,9 @@ void DivideFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
99165
99176
|
BinaryScalarFunctionIgnoreZero<interval_t, int64_t, interval_t, DivideOperator>));
|
|
99166
99177
|
|
|
99167
99178
|
set.AddFunction(functions);
|
|
99179
|
+
|
|
99180
|
+
functions.name = "divide";
|
|
99181
|
+
set.AddFunction(functions);
|
|
99168
99182
|
}
|
|
99169
99183
|
|
|
99170
99184
|
//===--------------------------------------------------------------------===//
|
|
@@ -165243,7 +165257,9 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
|
|
|
165243
165257
|
if (column.Generated()) {
|
|
165244
165258
|
continue;
|
|
165245
165259
|
}
|
|
165246
|
-
|
|
165260
|
+
if (column.Type().id() == LogicalTypeId::VARCHAR) {
|
|
165261
|
+
ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
|
|
165262
|
+
}
|
|
165247
165263
|
BindLogicalType(context, column.TypeMutable());
|
|
165248
165264
|
if (column.Type().id() == LogicalTypeId::ENUM) {
|
|
165249
165265
|
// We add a catalog dependency
|
|
@@ -179096,7 +179112,10 @@ DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &t
|
|
|
179096
179112
|
}
|
|
179097
179113
|
row_groups->AppendSegment(move(new_row_group));
|
|
179098
179114
|
}
|
|
179099
|
-
column_stats
|
|
179115
|
+
column_stats.reserve(data->column_stats.size());
|
|
179116
|
+
for (auto &stats : data->column_stats) {
|
|
179117
|
+
column_stats.push_back(make_shared<ColumnStatistics>(move(stats)));
|
|
179118
|
+
}
|
|
179100
179119
|
if (column_stats.size() != types.size()) { // LCOV_EXCL_START
|
|
179101
179120
|
throw IOException("Table statistics column count is not aligned with table column count. Corrupt file?");
|
|
179102
179121
|
} // LCOV_EXCL_STOP
|
|
@@ -179106,7 +179125,7 @@ DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &t
|
|
|
179106
179125
|
|
|
179107
179126
|
AppendRowGroup(0);
|
|
179108
179127
|
for (auto &type : types) {
|
|
179109
|
-
column_stats.push_back(
|
|
179128
|
+
column_stats.push_back(ColumnStatistics::CreateEmptyStats(type));
|
|
179110
179129
|
}
|
|
179111
179130
|
} else {
|
|
179112
179131
|
D_ASSERT(column_stats.size() == types.size());
|
|
@@ -179134,9 +179153,9 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition
|
|
|
179134
179153
|
|
|
179135
179154
|
// set up the statistics
|
|
179136
179155
|
for (idx_t i = 0; i < parent.column_stats.size(); i++) {
|
|
179137
|
-
column_stats.push_back(parent.column_stats[i]
|
|
179156
|
+
column_stats.push_back(parent.column_stats[i]);
|
|
179138
179157
|
}
|
|
179139
|
-
column_stats.push_back(
|
|
179158
|
+
column_stats.push_back(ColumnStatistics::CreateEmptyStats(new_column_type));
|
|
179140
179159
|
|
|
179141
179160
|
// add the column definitions from this DataTable
|
|
179142
179161
|
column_definitions.emplace_back(new_column.Copy());
|
|
@@ -179159,7 +179178,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition
|
|
|
179159
179178
|
while (current_row_group) {
|
|
179160
179179
|
auto new_row_group = current_row_group->AddColumn(context, new_column, executor, default_value, result);
|
|
179161
179180
|
// merge in the statistics
|
|
179162
|
-
column_stats[new_column_idx]->Merge(*new_row_group->GetStatistics(new_column_idx));
|
|
179181
|
+
column_stats[new_column_idx]->stats->Merge(*new_row_group->GetStatistics(new_column_idx));
|
|
179163
179182
|
|
|
179164
179183
|
row_groups->AppendSegment(move(new_row_group));
|
|
179165
179184
|
current_row_group = (RowGroup *)current_row_group->next.get();
|
|
@@ -179195,7 +179214,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
|
|
|
179195
179214
|
// erase the stats from this DataTable
|
|
179196
179215
|
for (idx_t i = 0; i < parent.column_stats.size(); i++) {
|
|
179197
179216
|
if (i != removed_column) {
|
|
179198
|
-
column_stats.push_back(parent.column_stats[i]
|
|
179217
|
+
column_stats.push_back(parent.column_stats[i]);
|
|
179199
179218
|
}
|
|
179200
179219
|
}
|
|
179201
179220
|
|
|
@@ -179251,10 +179270,9 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_id
|
|
|
179251
179270
|
// the column that had its type changed will have the new statistics computed during conversion
|
|
179252
179271
|
for (idx_t i = 0; i < column_definitions.size(); i++) {
|
|
179253
179272
|
if (i == changed_idx) {
|
|
179254
|
-
column_stats.push_back(
|
|
179255
|
-
BaseStatistics::CreateEmpty(column_definitions[i].Type(), StatisticsType::GLOBAL_STATS));
|
|
179273
|
+
column_stats.push_back(ColumnStatistics::CreateEmptyStats(column_definitions[i].Type()));
|
|
179256
179274
|
} else {
|
|
179257
|
-
column_stats.push_back(parent.column_stats[i]
|
|
179275
|
+
column_stats.push_back(parent.column_stats[i]);
|
|
179258
179276
|
}
|
|
179259
179277
|
}
|
|
179260
179278
|
|
|
@@ -179285,7 +179303,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_id
|
|
|
179285
179303
|
while (current_row_group) {
|
|
179286
179304
|
auto new_row_group =
|
|
179287
179305
|
current_row_group->AlterType(context, target_type, changed_idx, executor, scan_state, scan_chunk);
|
|
179288
|
-
column_stats[changed_idx]->Merge(*new_row_group->GetStatistics(changed_idx));
|
|
179306
|
+
column_stats[changed_idx]->stats->Merge(*new_row_group->GetStatistics(changed_idx));
|
|
179289
179307
|
row_groups->AppendSegment(move(new_row_group));
|
|
179290
179308
|
current_row_group = (RowGroup *)current_row_group->next.get();
|
|
179291
179309
|
}
|
|
@@ -179778,7 +179796,7 @@ void DataTable::Append(Transaction &transaction, DataChunk &chunk, TableAppendSt
|
|
|
179778
179796
|
// merge the stats
|
|
179779
179797
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
179780
179798
|
for (idx_t i = 0; i < column_definitions.size(); i++) {
|
|
179781
|
-
column_stats[i]->Merge(*current_row_group->GetStatistics(i));
|
|
179799
|
+
column_stats[i]->stats->Merge(*current_row_group->GetStatistics(i));
|
|
179782
179800
|
}
|
|
179783
179801
|
}
|
|
179784
179802
|
state.remaining_append_count -= append_count;
|
|
@@ -179812,7 +179830,7 @@ void DataTable::Append(Transaction &transaction, DataChunk &chunk, TableAppendSt
|
|
|
179812
179830
|
if (type == PhysicalType::LIST || type == PhysicalType::STRUCT) {
|
|
179813
179831
|
continue;
|
|
179814
179832
|
}
|
|
179815
|
-
column_stats[col_idx]->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
|
|
179833
|
+
column_stats[col_idx]->stats->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
|
|
179816
179834
|
}
|
|
179817
179835
|
}
|
|
179818
179836
|
|
|
@@ -180257,7 +180275,7 @@ void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector
|
|
|
180257
180275
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
180258
180276
|
for (idx_t i = 0; i < column_ids.size(); i++) {
|
|
180259
180277
|
auto column_id = column_ids[i];
|
|
180260
|
-
column_stats[column_id]->Merge(*row_group->GetStatistics(column_id));
|
|
180278
|
+
column_stats[column_id]->stats->Merge(*row_group->GetStatistics(column_id));
|
|
180261
180279
|
}
|
|
180262
180280
|
} while (pos < count);
|
|
180263
180281
|
}
|
|
@@ -180290,7 +180308,7 @@ void DataTable::UpdateColumn(TableCatalogEntry &table, ClientContext &context, V
|
|
|
180290
180308
|
row_group->UpdateColumn(transaction, updates, row_ids, column_path);
|
|
180291
180309
|
|
|
180292
180310
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
180293
|
-
column_stats[primary_column_idx]->Merge(*row_group->GetStatistics(primary_column_idx));
|
|
180311
|
+
column_stats[primary_column_idx]->stats->Merge(*row_group->GetStatistics(primary_column_idx));
|
|
180294
180312
|
}
|
|
180295
180313
|
|
|
180296
180314
|
//===--------------------------------------------------------------------===//
|
|
@@ -180375,7 +180393,7 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
|
|
|
180375
180393
|
return nullptr;
|
|
180376
180394
|
}
|
|
180377
180395
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
180378
|
-
return column_stats[column_id]->Copy();
|
|
180396
|
+
return column_stats[column_id]->stats->Copy();
|
|
180379
180397
|
}
|
|
180380
180398
|
|
|
180381
180399
|
//===--------------------------------------------------------------------===//
|
|
@@ -180386,7 +180404,7 @@ BlockPointer DataTable::Checkpoint(TableDataWriter &writer) {
|
|
|
180386
180404
|
// FIXME: we might want to combine adjacent row groups in case they have had deletions...
|
|
180387
180405
|
vector<unique_ptr<BaseStatistics>> global_stats;
|
|
180388
180406
|
for (idx_t i = 0; i < column_definitions.size(); i++) {
|
|
180389
|
-
global_stats.push_back(column_stats[i]->Copy());
|
|
180407
|
+
global_stats.push_back(column_stats[i]->stats->Copy());
|
|
180390
180408
|
}
|
|
180391
180409
|
|
|
180392
180410
|
auto row_group = (RowGroup *)row_groups->GetRootSegment();
|
|
@@ -181852,6 +181870,19 @@ void BaseStatistics::Verify(Vector &vector, idx_t count) const {
|
|
|
181852
181870
|
} // namespace duckdb
|
|
181853
181871
|
|
|
181854
181872
|
|
|
181873
|
+
namespace duckdb {
|
|
181874
|
+
|
|
181875
|
+
ColumnStatistics::ColumnStatistics(unique_ptr<BaseStatistics> stats_p) : stats(move(stats_p)) {
|
|
181876
|
+
}
|
|
181877
|
+
|
|
181878
|
+
shared_ptr<ColumnStatistics> ColumnStatistics::CreateEmptyStats(const LogicalType &type) {
|
|
181879
|
+
auto col_stats = BaseStatistics::CreateEmpty(type, StatisticsType::GLOBAL_STATS);
|
|
181880
|
+
return make_shared<ColumnStatistics>(move(col_stats));
|
|
181881
|
+
}
|
|
181882
|
+
|
|
181883
|
+
} // namespace duckdb
|
|
181884
|
+
|
|
181885
|
+
|
|
181855
181886
|
|
|
181856
181887
|
|
|
181857
181888
|
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "8b86bff5a"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev983"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -11653,7 +11653,6 @@ private:
|
|
|
11653
11653
|
|
|
11654
11654
|
namespace duckdb {
|
|
11655
11655
|
|
|
11656
|
-
class ColumnStatistics;
|
|
11657
11656
|
class DataTable;
|
|
11658
11657
|
struct CreateTableInfo;
|
|
11659
11658
|
struct BoundCreateTableInfo;
|
|
@@ -21370,6 +21369,32 @@ private:
|
|
|
21370
21369
|
|
|
21371
21370
|
|
|
21372
21371
|
|
|
21372
|
+
//===----------------------------------------------------------------------===//
|
|
21373
|
+
// DuckDB
|
|
21374
|
+
//
|
|
21375
|
+
// duckdb/storage/statistics/column_statistics.hpp
|
|
21376
|
+
//
|
|
21377
|
+
//
|
|
21378
|
+
//===----------------------------------------------------------------------===//
|
|
21379
|
+
|
|
21380
|
+
|
|
21381
|
+
|
|
21382
|
+
|
|
21383
|
+
|
|
21384
|
+
namespace duckdb {
|
|
21385
|
+
|
|
21386
|
+
class ColumnStatistics {
|
|
21387
|
+
public:
|
|
21388
|
+
explicit ColumnStatistics(unique_ptr<BaseStatistics> stats_p);
|
|
21389
|
+
|
|
21390
|
+
unique_ptr<BaseStatistics> stats;
|
|
21391
|
+
|
|
21392
|
+
public:
|
|
21393
|
+
static shared_ptr<ColumnStatistics> CreateEmptyStats(const LogicalType &type);
|
|
21394
|
+
};
|
|
21395
|
+
|
|
21396
|
+
} // namespace duckdb
|
|
21397
|
+
|
|
21373
21398
|
|
|
21374
21399
|
|
|
21375
21400
|
|
|
@@ -21604,7 +21629,7 @@ private:
|
|
|
21604
21629
|
//! The segment trees holding the various row_groups of the table
|
|
21605
21630
|
shared_ptr<SegmentTree> row_groups;
|
|
21606
21631
|
//! Column statistics
|
|
21607
|
-
vector<
|
|
21632
|
+
vector<shared_ptr<ColumnStatistics>> column_stats;
|
|
21608
21633
|
//! The statistics lock
|
|
21609
21634
|
mutex stats_lock;
|
|
21610
21635
|
//! Whether or not the data table is the root DataTable for this table; the root DataTable is the newest version
|