duckdb 0.3.5-dev964.0 → 0.3.5-dev974.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +61 -20
- package/src/duckdb.hpp +29 -4
- package/src/parquet-amalgamation.cpp +36192 -36192
package/package.json
CHANGED
package/src/duckdb.cpp
CHANGED
|
@@ -297,7 +297,6 @@ public:
|
|
|
297
297
|
|
|
298
298
|
namespace duckdb {
|
|
299
299
|
|
|
300
|
-
class ColumnStatistics;
|
|
301
300
|
class DataTable;
|
|
302
301
|
struct CreateViewInfo;
|
|
303
302
|
|
|
@@ -3594,10 +3593,13 @@ TableCatalogEntry::TableCatalogEntry(Catalog *catalog, SchemaCatalogEntry *schem
|
|
|
3594
3593
|
column_dependency_manager(move(info->column_dependency_manager)) {
|
|
3595
3594
|
this->temporary = info->Base().temporary;
|
|
3596
3595
|
// add lower case aliases
|
|
3596
|
+
this->name_map = move(info->name_map);
|
|
3597
|
+
#ifdef DEBUG
|
|
3598
|
+
D_ASSERT(name_map.size() == columns.size());
|
|
3597
3599
|
for (idx_t i = 0; i < columns.size(); i++) {
|
|
3598
|
-
D_ASSERT(name_map
|
|
3599
|
-
name_map[columns[i].Name()] = i;
|
|
3600
|
+
D_ASSERT(name_map[columns[i].Name()] == i);
|
|
3600
3601
|
}
|
|
3602
|
+
#endif
|
|
3601
3603
|
// add the "rowid" alias, if there is no rowid column specified in the table
|
|
3602
3604
|
if (name_map.find("rowid") == name_map.end()) {
|
|
3603
3605
|
name_map["rowid"] = COLUMN_IDENTIFIER_ROW_ID;
|
|
@@ -97509,12 +97511,28 @@ struct IsInfiniteOperator {
|
|
|
97509
97511
|
}
|
|
97510
97512
|
};
|
|
97511
97513
|
|
|
97514
|
+
template <>
|
|
97515
|
+
bool IsInfiniteOperator::Operation(date_t input) {
|
|
97516
|
+
return !Value::IsFinite(input);
|
|
97517
|
+
}
|
|
97518
|
+
|
|
97519
|
+
template <>
|
|
97520
|
+
bool IsInfiniteOperator::Operation(timestamp_t input) {
|
|
97521
|
+
return !Value::IsFinite(input);
|
|
97522
|
+
}
|
|
97523
|
+
|
|
97512
97524
|
void IsInfiniteFun::RegisterFunction(BuiltinFunctions &set) {
|
|
97513
97525
|
ScalarFunctionSet funcs("isinf");
|
|
97514
97526
|
funcs.AddFunction(ScalarFunction({LogicalType::FLOAT}, LogicalType::BOOLEAN,
|
|
97515
97527
|
ScalarFunction::UnaryFunction<float, bool, IsInfiniteOperator>));
|
|
97516
97528
|
funcs.AddFunction(ScalarFunction({LogicalType::DOUBLE}, LogicalType::BOOLEAN,
|
|
97517
97529
|
ScalarFunction::UnaryFunction<double, bool, IsInfiniteOperator>));
|
|
97530
|
+
funcs.AddFunction(ScalarFunction({LogicalType::DATE}, LogicalType::BOOLEAN,
|
|
97531
|
+
ScalarFunction::UnaryFunction<date_t, bool, IsInfiniteOperator>));
|
|
97532
|
+
funcs.AddFunction(ScalarFunction({LogicalType::TIMESTAMP}, LogicalType::BOOLEAN,
|
|
97533
|
+
ScalarFunction::UnaryFunction<timestamp_t, bool, IsInfiniteOperator>));
|
|
97534
|
+
funcs.AddFunction(ScalarFunction({LogicalType::TIMESTAMP_TZ}, LogicalType::BOOLEAN,
|
|
97535
|
+
ScalarFunction::UnaryFunction<timestamp_t, bool, IsInfiniteOperator>));
|
|
97518
97536
|
set.AddFunction(funcs);
|
|
97519
97537
|
}
|
|
97520
97538
|
|
|
@@ -97534,6 +97552,12 @@ void IsFiniteFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
97534
97552
|
ScalarFunction::UnaryFunction<float, bool, IsFiniteOperator>));
|
|
97535
97553
|
funcs.AddFunction(ScalarFunction({LogicalType::DOUBLE}, LogicalType::BOOLEAN,
|
|
97536
97554
|
ScalarFunction::UnaryFunction<double, bool, IsFiniteOperator>));
|
|
97555
|
+
funcs.AddFunction(ScalarFunction({LogicalType::DATE}, LogicalType::BOOLEAN,
|
|
97556
|
+
ScalarFunction::UnaryFunction<date_t, bool, IsFiniteOperator>));
|
|
97557
|
+
funcs.AddFunction(ScalarFunction({LogicalType::TIMESTAMP}, LogicalType::BOOLEAN,
|
|
97558
|
+
ScalarFunction::UnaryFunction<timestamp_t, bool, IsFiniteOperator>));
|
|
97559
|
+
funcs.AddFunction(ScalarFunction({LogicalType::TIMESTAMP_TZ}, LogicalType::BOOLEAN,
|
|
97560
|
+
ScalarFunction::UnaryFunction<timestamp_t, bool, IsFiniteOperator>));
|
|
97537
97561
|
set.AddFunction(funcs);
|
|
97538
97562
|
}
|
|
97539
97563
|
|
|
@@ -165221,7 +165245,9 @@ unique_ptr<BoundCreateTableInfo> Binder::BindCreateTableInfo(unique_ptr<CreateIn
|
|
|
165221
165245
|
if (column.Generated()) {
|
|
165222
165246
|
continue;
|
|
165223
165247
|
}
|
|
165224
|
-
|
|
165248
|
+
if (column.Type().id() == LogicalTypeId::VARCHAR) {
|
|
165249
|
+
ExpressionBinder::TestCollation(context, StringType::GetCollation(column.Type()));
|
|
165250
|
+
}
|
|
165225
165251
|
BindLogicalType(context, column.TypeMutable());
|
|
165226
165252
|
if (column.Type().id() == LogicalTypeId::ENUM) {
|
|
165227
165253
|
// We add a catalog dependency
|
|
@@ -179074,7 +179100,10 @@ DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &t
|
|
|
179074
179100
|
}
|
|
179075
179101
|
row_groups->AppendSegment(move(new_row_group));
|
|
179076
179102
|
}
|
|
179077
|
-
column_stats
|
|
179103
|
+
column_stats.reserve(data->column_stats.size());
|
|
179104
|
+
for (auto &stats : data->column_stats) {
|
|
179105
|
+
column_stats.push_back(make_shared<ColumnStatistics>(move(stats)));
|
|
179106
|
+
}
|
|
179078
179107
|
if (column_stats.size() != types.size()) { // LCOV_EXCL_START
|
|
179079
179108
|
throw IOException("Table statistics column count is not aligned with table column count. Corrupt file?");
|
|
179080
179109
|
} // LCOV_EXCL_STOP
|
|
@@ -179084,7 +179113,7 @@ DataTable::DataTable(DatabaseInstance &db, const string &schema, const string &t
|
|
|
179084
179113
|
|
|
179085
179114
|
AppendRowGroup(0);
|
|
179086
179115
|
for (auto &type : types) {
|
|
179087
|
-
column_stats.push_back(
|
|
179116
|
+
column_stats.push_back(ColumnStatistics::CreateEmptyStats(type));
|
|
179088
179117
|
}
|
|
179089
179118
|
} else {
|
|
179090
179119
|
D_ASSERT(column_stats.size() == types.size());
|
|
@@ -179112,9 +179141,9 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition
|
|
|
179112
179141
|
|
|
179113
179142
|
// set up the statistics
|
|
179114
179143
|
for (idx_t i = 0; i < parent.column_stats.size(); i++) {
|
|
179115
|
-
column_stats.push_back(parent.column_stats[i]
|
|
179144
|
+
column_stats.push_back(parent.column_stats[i]);
|
|
179116
179145
|
}
|
|
179117
|
-
column_stats.push_back(
|
|
179146
|
+
column_stats.push_back(ColumnStatistics::CreateEmptyStats(new_column_type));
|
|
179118
179147
|
|
|
179119
179148
|
// add the column definitions from this DataTable
|
|
179120
179149
|
column_definitions.emplace_back(new_column.Copy());
|
|
@@ -179137,7 +179166,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, ColumnDefinition
|
|
|
179137
179166
|
while (current_row_group) {
|
|
179138
179167
|
auto new_row_group = current_row_group->AddColumn(context, new_column, executor, default_value, result);
|
|
179139
179168
|
// merge in the statistics
|
|
179140
|
-
column_stats[new_column_idx]->Merge(*new_row_group->GetStatistics(new_column_idx));
|
|
179169
|
+
column_stats[new_column_idx]->stats->Merge(*new_row_group->GetStatistics(new_column_idx));
|
|
179141
179170
|
|
|
179142
179171
|
row_groups->AppendSegment(move(new_row_group));
|
|
179143
179172
|
current_row_group = (RowGroup *)current_row_group->next.get();
|
|
@@ -179173,7 +179202,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t removed_co
|
|
|
179173
179202
|
// erase the stats from this DataTable
|
|
179174
179203
|
for (idx_t i = 0; i < parent.column_stats.size(); i++) {
|
|
179175
179204
|
if (i != removed_column) {
|
|
179176
|
-
column_stats.push_back(parent.column_stats[i]
|
|
179205
|
+
column_stats.push_back(parent.column_stats[i]);
|
|
179177
179206
|
}
|
|
179178
179207
|
}
|
|
179179
179208
|
|
|
@@ -179229,10 +179258,9 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_id
|
|
|
179229
179258
|
// the column that had its type changed will have the new statistics computed during conversion
|
|
179230
179259
|
for (idx_t i = 0; i < column_definitions.size(); i++) {
|
|
179231
179260
|
if (i == changed_idx) {
|
|
179232
|
-
column_stats.push_back(
|
|
179233
|
-
BaseStatistics::CreateEmpty(column_definitions[i].Type(), StatisticsType::GLOBAL_STATS));
|
|
179261
|
+
column_stats.push_back(ColumnStatistics::CreateEmptyStats(column_definitions[i].Type()));
|
|
179234
179262
|
} else {
|
|
179235
|
-
column_stats.push_back(parent.column_stats[i]
|
|
179263
|
+
column_stats.push_back(parent.column_stats[i]);
|
|
179236
179264
|
}
|
|
179237
179265
|
}
|
|
179238
179266
|
|
|
@@ -179263,7 +179291,7 @@ DataTable::DataTable(ClientContext &context, DataTable &parent, idx_t changed_id
|
|
|
179263
179291
|
while (current_row_group) {
|
|
179264
179292
|
auto new_row_group =
|
|
179265
179293
|
current_row_group->AlterType(context, target_type, changed_idx, executor, scan_state, scan_chunk);
|
|
179266
|
-
column_stats[changed_idx]->Merge(*new_row_group->GetStatistics(changed_idx));
|
|
179294
|
+
column_stats[changed_idx]->stats->Merge(*new_row_group->GetStatistics(changed_idx));
|
|
179267
179295
|
row_groups->AppendSegment(move(new_row_group));
|
|
179268
179296
|
current_row_group = (RowGroup *)current_row_group->next.get();
|
|
179269
179297
|
}
|
|
@@ -179756,7 +179784,7 @@ void DataTable::Append(Transaction &transaction, DataChunk &chunk, TableAppendSt
|
|
|
179756
179784
|
// merge the stats
|
|
179757
179785
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
179758
179786
|
for (idx_t i = 0; i < column_definitions.size(); i++) {
|
|
179759
|
-
column_stats[i]->Merge(*current_row_group->GetStatistics(i));
|
|
179787
|
+
column_stats[i]->stats->Merge(*current_row_group->GetStatistics(i));
|
|
179760
179788
|
}
|
|
179761
179789
|
}
|
|
179762
179790
|
state.remaining_append_count -= append_count;
|
|
@@ -179790,7 +179818,7 @@ void DataTable::Append(Transaction &transaction, DataChunk &chunk, TableAppendSt
|
|
|
179790
179818
|
if (type == PhysicalType::LIST || type == PhysicalType::STRUCT) {
|
|
179791
179819
|
continue;
|
|
179792
179820
|
}
|
|
179793
|
-
column_stats[col_idx]->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
|
|
179821
|
+
column_stats[col_idx]->stats->UpdateDistinctStatistics(chunk.data[col_idx], chunk.size());
|
|
179794
179822
|
}
|
|
179795
179823
|
}
|
|
179796
179824
|
|
|
@@ -180235,7 +180263,7 @@ void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector
|
|
|
180235
180263
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
180236
180264
|
for (idx_t i = 0; i < column_ids.size(); i++) {
|
|
180237
180265
|
auto column_id = column_ids[i];
|
|
180238
|
-
column_stats[column_id]->Merge(*row_group->GetStatistics(column_id));
|
|
180266
|
+
column_stats[column_id]->stats->Merge(*row_group->GetStatistics(column_id));
|
|
180239
180267
|
}
|
|
180240
180268
|
} while (pos < count);
|
|
180241
180269
|
}
|
|
@@ -180268,7 +180296,7 @@ void DataTable::UpdateColumn(TableCatalogEntry &table, ClientContext &context, V
|
|
|
180268
180296
|
row_group->UpdateColumn(transaction, updates, row_ids, column_path);
|
|
180269
180297
|
|
|
180270
180298
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
180271
|
-
column_stats[primary_column_idx]->Merge(*row_group->GetStatistics(primary_column_idx));
|
|
180299
|
+
column_stats[primary_column_idx]->stats->Merge(*row_group->GetStatistics(primary_column_idx));
|
|
180272
180300
|
}
|
|
180273
180301
|
|
|
180274
180302
|
//===--------------------------------------------------------------------===//
|
|
@@ -180353,7 +180381,7 @@ unique_ptr<BaseStatistics> DataTable::GetStatistics(ClientContext &context, colu
|
|
|
180353
180381
|
return nullptr;
|
|
180354
180382
|
}
|
|
180355
180383
|
lock_guard<mutex> stats_guard(stats_lock);
|
|
180356
|
-
return column_stats[column_id]->Copy();
|
|
180384
|
+
return column_stats[column_id]->stats->Copy();
|
|
180357
180385
|
}
|
|
180358
180386
|
|
|
180359
180387
|
//===--------------------------------------------------------------------===//
|
|
@@ -180364,7 +180392,7 @@ BlockPointer DataTable::Checkpoint(TableDataWriter &writer) {
|
|
|
180364
180392
|
// FIXME: we might want to combine adjacent row groups in case they have had deletions...
|
|
180365
180393
|
vector<unique_ptr<BaseStatistics>> global_stats;
|
|
180366
180394
|
for (idx_t i = 0; i < column_definitions.size(); i++) {
|
|
180367
|
-
global_stats.push_back(column_stats[i]->Copy());
|
|
180395
|
+
global_stats.push_back(column_stats[i]->stats->Copy());
|
|
180368
180396
|
}
|
|
180369
180397
|
|
|
180370
180398
|
auto row_group = (RowGroup *)row_groups->GetRootSegment();
|
|
@@ -181830,6 +181858,19 @@ void BaseStatistics::Verify(Vector &vector, idx_t count) const {
|
|
|
181830
181858
|
} // namespace duckdb
|
|
181831
181859
|
|
|
181832
181860
|
|
|
181861
|
+
namespace duckdb {
|
|
181862
|
+
|
|
181863
|
+
ColumnStatistics::ColumnStatistics(unique_ptr<BaseStatistics> stats_p) : stats(move(stats_p)) {
|
|
181864
|
+
}
|
|
181865
|
+
|
|
181866
|
+
shared_ptr<ColumnStatistics> ColumnStatistics::CreateEmptyStats(const LogicalType &type) {
|
|
181867
|
+
auto col_stats = BaseStatistics::CreateEmpty(type, StatisticsType::GLOBAL_STATS);
|
|
181868
|
+
return make_shared<ColumnStatistics>(move(col_stats));
|
|
181869
|
+
}
|
|
181870
|
+
|
|
181871
|
+
} // namespace duckdb
|
|
181872
|
+
|
|
181873
|
+
|
|
181833
181874
|
|
|
181834
181875
|
|
|
181835
181876
|
|
package/src/duckdb.hpp
CHANGED
|
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
|
|
|
11
11
|
#pragma once
|
|
12
12
|
#define DUCKDB_AMALGAMATION 1
|
|
13
13
|
#define DUCKDB_AMALGAMATION_EXTENDED 1
|
|
14
|
-
#define DUCKDB_SOURCE_ID "
|
|
15
|
-
#define DUCKDB_VERSION "v0.3.5-
|
|
14
|
+
#define DUCKDB_SOURCE_ID "05499e7c7"
|
|
15
|
+
#define DUCKDB_VERSION "v0.3.5-dev974"
|
|
16
16
|
//===----------------------------------------------------------------------===//
|
|
17
17
|
// DuckDB
|
|
18
18
|
//
|
|
@@ -11653,7 +11653,6 @@ private:
|
|
|
11653
11653
|
|
|
11654
11654
|
namespace duckdb {
|
|
11655
11655
|
|
|
11656
|
-
class ColumnStatistics;
|
|
11657
11656
|
class DataTable;
|
|
11658
11657
|
struct CreateTableInfo;
|
|
11659
11658
|
struct BoundCreateTableInfo;
|
|
@@ -21370,6 +21369,32 @@ private:
|
|
|
21370
21369
|
|
|
21371
21370
|
|
|
21372
21371
|
|
|
21372
|
+
//===----------------------------------------------------------------------===//
|
|
21373
|
+
// DuckDB
|
|
21374
|
+
//
|
|
21375
|
+
// duckdb/storage/statistics/column_statistics.hpp
|
|
21376
|
+
//
|
|
21377
|
+
//
|
|
21378
|
+
//===----------------------------------------------------------------------===//
|
|
21379
|
+
|
|
21380
|
+
|
|
21381
|
+
|
|
21382
|
+
|
|
21383
|
+
|
|
21384
|
+
namespace duckdb {
|
|
21385
|
+
|
|
21386
|
+
class ColumnStatistics {
|
|
21387
|
+
public:
|
|
21388
|
+
explicit ColumnStatistics(unique_ptr<BaseStatistics> stats_p);
|
|
21389
|
+
|
|
21390
|
+
unique_ptr<BaseStatistics> stats;
|
|
21391
|
+
|
|
21392
|
+
public:
|
|
21393
|
+
static shared_ptr<ColumnStatistics> CreateEmptyStats(const LogicalType &type);
|
|
21394
|
+
};
|
|
21395
|
+
|
|
21396
|
+
} // namespace duckdb
|
|
21397
|
+
|
|
21373
21398
|
|
|
21374
21399
|
|
|
21375
21400
|
|
|
@@ -21604,7 +21629,7 @@ private:
|
|
|
21604
21629
|
//! The segment trees holding the various row_groups of the table
|
|
21605
21630
|
shared_ptr<SegmentTree> row_groups;
|
|
21606
21631
|
//! Column statistics
|
|
21607
|
-
vector<
|
|
21632
|
+
vector<shared_ptr<ColumnStatistics>> column_stats;
|
|
21608
21633
|
//! The statistics lock
|
|
21609
21634
|
mutex stats_lock;
|
|
21610
21635
|
//! Whether or not the data table is the root DataTable for this table; the root DataTable is the newest version
|