duckdb 0.8.2-dev3204.0 → 0.8.2-dev3244.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/src/common/types/value.cpp +14 -0
- package/src/duckdb/src/common/types/vector.cpp +9 -3
- package/src/duckdb/src/common/types.cpp +18 -1
- package/src/duckdb/src/core_functions/aggregate/nested/histogram.cpp +1 -0
- package/src/duckdb/src/core_functions/scalar/map/map_entries.cpp +2 -2
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +24 -27
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/main/settings.hpp +2 -10
- package/src/duckdb/src/main/config.cpp +0 -1
- package/src/duckdb/src/main/settings/settings.cpp +0 -15
- package/src/duckdb/src/storage/compression/fsst.cpp +1 -1
- package/src/duckdb/src/storage/data_table.cpp +35 -24
package/package.json
CHANGED
@@ -663,6 +663,20 @@ Value Value::MAP(const LogicalType &child_type, vector<Value> values) {
|
|
663
663
|
|
664
664
|
result.type_ = LogicalType::MAP(child_type);
|
665
665
|
result.is_null = false;
|
666
|
+
for (auto &val : values) {
|
667
|
+
D_ASSERT(val.type().InternalType() == PhysicalType::STRUCT);
|
668
|
+
auto &children = StructValue::GetChildren(val);
|
669
|
+
|
670
|
+
// Ensure that the field containing the keys is called 'key'
|
671
|
+
// and that the field containing the values is called 'value'
|
672
|
+
// this is required to make equality checks work
|
673
|
+
D_ASSERT(children.size() == 2);
|
674
|
+
child_list_t<Value> new_children;
|
675
|
+
new_children.reserve(2);
|
676
|
+
new_children.push_back(std::make_pair("key", children[0]));
|
677
|
+
new_children.push_back(std::make_pair("value", children[1]));
|
678
|
+
val = Value::STRUCT(std::move(new_children));
|
679
|
+
}
|
666
680
|
result.value_info_ = make_shared<NestedValueInfo>(std::move(values));
|
667
681
|
return result;
|
668
682
|
}
|
@@ -1286,6 +1286,11 @@ void Vector::UTFVerify(idx_t count) {
|
|
1286
1286
|
void Vector::VerifyMap(Vector &vector_p, const SelectionVector &sel_p, idx_t count) {
|
1287
1287
|
#ifdef DEBUG
|
1288
1288
|
D_ASSERT(vector_p.GetType().id() == LogicalTypeId::MAP);
|
1289
|
+
auto &child = ListType::GetChildType(vector_p.GetType());
|
1290
|
+
D_ASSERT(StructType::GetChildCount(child) == 2);
|
1291
|
+
D_ASSERT(StructType::GetChildName(child, 0) == "key");
|
1292
|
+
D_ASSERT(StructType::GetChildName(child, 1) == "value");
|
1293
|
+
|
1289
1294
|
auto valid_check = MapVector::CheckMapValidity(vector_p, count, sel_p);
|
1290
1295
|
D_ASSERT(valid_check == MapInvalidReason::VALID);
|
1291
1296
|
#endif // DEBUG
|
@@ -1410,9 +1415,6 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
|
|
1410
1415
|
}
|
1411
1416
|
}
|
1412
1417
|
}
|
1413
|
-
if (vector->GetType().id() == LogicalTypeId::MAP) {
|
1414
|
-
VerifyMap(*vector, *sel, count);
|
1415
|
-
}
|
1416
1418
|
|
1417
1419
|
if (vector->GetType().id() == LogicalTypeId::UNION) {
|
1418
1420
|
VerifyUnion(*vector, *sel, count);
|
@@ -1460,6 +1462,10 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
|
|
1460
1462
|
}
|
1461
1463
|
Vector::Verify(child, child_sel, child_count);
|
1462
1464
|
}
|
1465
|
+
|
1466
|
+
if (vector->GetType().id() == LogicalTypeId::MAP) {
|
1467
|
+
VerifyMap(*vector, *sel, count);
|
1468
|
+
}
|
1463
1469
|
}
|
1464
1470
|
#endif
|
1465
1471
|
}
|
@@ -927,7 +927,24 @@ LogicalType LogicalType::AGGREGATE_STATE(aggregate_state_t state_type) { // NOLI
|
|
927
927
|
//===--------------------------------------------------------------------===//
|
928
928
|
// Map Type
|
929
929
|
//===--------------------------------------------------------------------===//
|
930
|
-
LogicalType LogicalType::MAP(const LogicalType &
|
930
|
+
LogicalType LogicalType::MAP(const LogicalType &child_p) {
|
931
|
+
D_ASSERT(child_p.id() == LogicalTypeId::STRUCT);
|
932
|
+
auto &children = StructType::GetChildTypes(child_p);
|
933
|
+
D_ASSERT(children.size() == 2);
|
934
|
+
|
935
|
+
// We do this to enforce that for every MAP created, the keys are called "key"
|
936
|
+
// and the values are called "value"
|
937
|
+
|
938
|
+
// This is done because for Vector the keys of the STRUCT are used in equality checks.
|
939
|
+
// Vector::Reference will throw if the types don't match
|
940
|
+
child_list_t<LogicalType> new_children(2);
|
941
|
+
new_children[0] = children[0];
|
942
|
+
new_children[0].first = "key";
|
943
|
+
|
944
|
+
new_children[1] = children[1];
|
945
|
+
new_children[1].first = "value";
|
946
|
+
|
947
|
+
auto child = LogicalType::STRUCT(std::move(new_children));
|
931
948
|
auto info = make_shared<ListTypeInfo>(child);
|
932
949
|
return LogicalType(LogicalTypeId::MAP, std::move(info));
|
933
950
|
}
|
@@ -142,6 +142,7 @@ static void HistogramFinalizeFunction(Vector &state_vector, AggregateInputData &
|
|
142
142
|
list_struct_data[rid].offset = old_len;
|
143
143
|
old_len += list_struct_data[rid].length;
|
144
144
|
}
|
145
|
+
result.Verify(count);
|
145
146
|
}
|
146
147
|
|
147
148
|
unique_ptr<FunctionData> HistogramBindFunction(ClientContext &context, AggregateFunction &function,
|
@@ -42,8 +42,8 @@ static unique_ptr<FunctionData> MapEntriesBind(ClientContext &context, ScalarFun
|
|
42
42
|
auto &key_type = MapType::KeyType(map);
|
43
43
|
auto &value_type = MapType::ValueType(map);
|
44
44
|
|
45
|
-
child_types.push_back(make_pair("
|
46
|
-
child_types.push_back(make_pair("
|
45
|
+
child_types.push_back(make_pair("key", key_type));
|
46
|
+
child_types.push_back(make_pair("value", value_type));
|
47
47
|
|
48
48
|
auto row_type = LogicalType::STRUCT(child_types);
|
49
49
|
|
@@ -308,40 +308,37 @@ OperatorResultType PhysicalUnnest::ExecuteInternal(ExecutionContext &context, Da
|
|
308
308
|
// UNNEST(NULL)
|
309
309
|
chunk.SetCardinality(0);
|
310
310
|
break;
|
311
|
+
}
|
311
312
|
|
312
|
-
|
313
|
-
|
314
|
-
auto &vector_data = state.list_vector_data[col_idx];
|
315
|
-
auto current_idx = vector_data.sel->get_index(state.current_row);
|
316
|
-
|
317
|
-
if (!vector_data.validity.RowIsValid(current_idx)) {
|
318
|
-
UnnestNull(0, this_chunk_len, result_vector);
|
313
|
+
auto &vector_data = state.list_vector_data[col_idx];
|
314
|
+
auto current_idx = vector_data.sel->get_index(state.current_row);
|
319
315
|
|
320
|
-
|
316
|
+
if (!vector_data.validity.RowIsValid(current_idx)) {
|
317
|
+
UnnestNull(0, this_chunk_len, result_vector);
|
318
|
+
continue;
|
319
|
+
}
|
321
320
|
|
322
|
-
|
323
|
-
|
321
|
+
auto list_data = UnifiedVectorFormat::GetData<list_entry_t>(vector_data);
|
322
|
+
auto list_entry = list_data[current_idx];
|
324
323
|
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
324
|
+
idx_t list_count = 0;
|
325
|
+
if (state.list_position < list_entry.length) {
|
326
|
+
// there are still list_count elements to unnest
|
327
|
+
list_count = MinValue<idx_t>(this_chunk_len, list_entry.length - state.list_position);
|
329
328
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
329
|
+
auto &list_vector = state.list_data.data[col_idx];
|
330
|
+
auto &child_vector = ListVector::GetEntry(list_vector);
|
331
|
+
auto list_size = ListVector::GetListSize(list_vector);
|
332
|
+
auto &child_vector_data = state.list_child_data[col_idx];
|
334
333
|
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
334
|
+
auto base_offset = list_entry.offset + state.list_position;
|
335
|
+
UnnestVector(child_vector_data, child_vector, list_size, base_offset, base_offset + list_count,
|
336
|
+
result_vector);
|
337
|
+
}
|
339
338
|
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
}
|
344
|
-
}
|
339
|
+
// fill the rest with NULLs
|
340
|
+
if (list_count != this_chunk_len) {
|
341
|
+
UnnestNull(list_count, this_chunk_len, result_vector);
|
345
342
|
}
|
346
343
|
}
|
347
344
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev3244"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "fb10d8eede"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -68,7 +68,7 @@ struct DebugForceNoCrossProduct {
|
|
68
68
|
struct OrderedAggregateThreshold {
|
69
69
|
static constexpr const char *Name = "ordered_aggregate_threshold"; // NOLINT
|
70
70
|
static constexpr const char *Description = // NOLINT
|
71
|
-
"
|
71
|
+
"The number of rows to accumulate before sorting, used for tuning";
|
72
72
|
static constexpr const LogicalTypeId InputType = LogicalTypeId::UBIGINT; // NOLINT
|
73
73
|
static void SetLocal(ClientContext &context, const Value ¶meter);
|
74
74
|
static void ResetLocal(ClientContext &context);
|
@@ -225,6 +225,7 @@ struct EnableProgressBarSetting {
|
|
225
225
|
static void ResetLocal(ClientContext &context);
|
226
226
|
static Value GetSetting(ClientContext &context);
|
227
227
|
};
|
228
|
+
|
228
229
|
struct EnableProgressBarPrintSetting {
|
229
230
|
static constexpr const char *Name = "enable_progress_bar_print";
|
230
231
|
static constexpr const char *Description =
|
@@ -235,15 +236,6 @@ struct EnableProgressBarPrintSetting {
|
|
235
236
|
static Value GetSetting(ClientContext &context);
|
236
237
|
};
|
237
238
|
|
238
|
-
struct ExperimentalParallelCSVSetting {
|
239
|
-
static constexpr const char *Name = "experimental_parallel_csv";
|
240
|
-
static constexpr const char *Description = "Whether or not to use the experimental parallel CSV reader";
|
241
|
-
static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
|
242
|
-
static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter);
|
243
|
-
static void ResetGlobal(DatabaseInstance *db, DBConfig &config);
|
244
|
-
static Value GetSetting(ClientContext &context);
|
245
|
-
};
|
246
|
-
|
247
239
|
struct ExplainOutputSetting {
|
248
240
|
static constexpr const char *Name = "explain_output";
|
249
241
|
static constexpr const char *Description = "Output of EXPLAIN statements (ALL, OPTIMIZED_ONLY, PHYSICAL_ONLY)";
|
@@ -75,7 +75,6 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
|
|
75
75
|
DUCKDB_LOCAL(EnableProfilingSetting),
|
76
76
|
DUCKDB_LOCAL(EnableProgressBarSetting),
|
77
77
|
DUCKDB_LOCAL(EnableProgressBarPrintSetting),
|
78
|
-
DUCKDB_GLOBAL(ExperimentalParallelCSVSetting),
|
79
78
|
DUCKDB_LOCAL(ExplainOutputSetting),
|
80
79
|
DUCKDB_GLOBAL(ExtensionDirectorySetting),
|
81
80
|
DUCKDB_GLOBAL(ExternalThreadsSetting),
|
@@ -564,21 +564,6 @@ Value EnableProgressBarPrintSetting::GetSetting(ClientContext &context) {
|
|
564
564
|
return Value::BOOLEAN(ClientConfig::GetConfig(context).print_progress_bar);
|
565
565
|
}
|
566
566
|
|
567
|
-
//===--------------------------------------------------------------------===//
|
568
|
-
// Experimental Parallel CSV
|
569
|
-
//===--------------------------------------------------------------------===//
|
570
|
-
void ExperimentalParallelCSVSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
|
571
|
-
Printer::Print("experimental_parallel_csv is deprecated and will be removed with the next release - the parallel "
|
572
|
-
"CSV reader is now standard and does not need to be manually enabled anymore 1");
|
573
|
-
}
|
574
|
-
|
575
|
-
void ExperimentalParallelCSVSetting::ResetGlobal(DatabaseInstance *db, DBConfig &config) {
|
576
|
-
}
|
577
|
-
|
578
|
-
Value ExperimentalParallelCSVSetting::GetSetting(ClientContext &context) {
|
579
|
-
return Value();
|
580
|
-
}
|
581
|
-
|
582
567
|
//===--------------------------------------------------------------------===//
|
583
568
|
// Explain Output
|
584
569
|
//===--------------------------------------------------------------------===//
|
@@ -461,7 +461,7 @@ void FSSTStorage::Compress(CompressionState &state_p, Vector &scan_vector, idx_t
|
|
461
461
|
&sizes_in[0], /* IN: byte-lengths of the inputs */
|
462
462
|
&strings_in[0], /* IN: input string start pointers. */
|
463
463
|
compress_buffer_size, /* IN: byte-length of output buffer. */
|
464
|
-
&compress_buffer[0], /* OUT:
|
464
|
+
&compress_buffer[0], /* OUT: memory buffer to put the compressed strings in (one after the other). */
|
465
465
|
&sizes_out[0], /* OUT: byte-lengths of the compressed strings. */
|
466
466
|
&strings_out[0] /* OUT: output string start pointers. Will all point into [output,output+size). */
|
467
467
|
);
|
@@ -1073,20 +1073,19 @@ void DataTable::VerifyUpdateConstraints(ClientContext &context, TableCatalogEntr
|
|
1073
1073
|
const vector<PhysicalIndex> &column_ids) {
|
1074
1074
|
auto &constraints = table.GetConstraints();
|
1075
1075
|
auto &bound_constraints = table.GetBoundConstraints();
|
1076
|
-
for (idx_t
|
1077
|
-
auto &base_constraint = constraints[
|
1078
|
-
auto &constraint = bound_constraints[
|
1076
|
+
for (idx_t constr_idx = 0; constr_idx < bound_constraints.size(); constr_idx++) {
|
1077
|
+
auto &base_constraint = constraints[constr_idx];
|
1078
|
+
auto &constraint = bound_constraints[constr_idx];
|
1079
1079
|
switch (constraint->type) {
|
1080
1080
|
case ConstraintType::NOT_NULL: {
|
1081
1081
|
auto &bound_not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
|
1082
1082
|
auto ¬_null = *reinterpret_cast<NotNullConstraint *>(base_constraint.get());
|
1083
1083
|
// check if the constraint is in the list of column_ids
|
1084
|
-
|
1085
|
-
|
1086
|
-
if (column_ids[i] == bound_not_null.index) {
|
1084
|
+
for (idx_t col_idx = 0; col_idx < column_ids.size(); col_idx++) {
|
1085
|
+
if (column_ids[col_idx] == bound_not_null.index) {
|
1087
1086
|
// found the column id: check the data in
|
1088
1087
|
auto &col = table.GetColumn(LogicalIndex(not_null.index));
|
1089
|
-
VerifyNotNullConstraint(table, chunk.data[
|
1088
|
+
VerifyNotNullConstraint(table, chunk.data[col_idx], chunk.size(), col.Name());
|
1090
1089
|
break;
|
1091
1090
|
}
|
1092
1091
|
}
|
@@ -1122,10 +1121,10 @@ void DataTable::VerifyUpdateConstraints(ClientContext &context, TableCatalogEntr
|
|
1122
1121
|
void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector &row_ids,
|
1123
1122
|
const vector<PhysicalIndex> &column_ids, DataChunk &updates) {
|
1124
1123
|
D_ASSERT(row_ids.GetType().InternalType() == ROW_TYPE);
|
1125
|
-
|
1126
1124
|
D_ASSERT(column_ids.size() == updates.ColumnCount());
|
1127
|
-
auto count = updates.size();
|
1128
1125
|
updates.Verify();
|
1126
|
+
|
1127
|
+
auto count = updates.size();
|
1129
1128
|
if (count == 0) {
|
1130
1129
|
return;
|
1131
1130
|
}
|
@@ -1138,24 +1137,36 @@ void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector
|
|
1138
1137
|
VerifyUpdateConstraints(context, table, updates, column_ids);
|
1139
1138
|
|
1140
1139
|
// now perform the actual update
|
1141
|
-
|
1140
|
+
Vector max_row_id_vec(Value::BIGINT(MAX_ROW_ID));
|
1141
|
+
Vector row_ids_slice(LogicalType::BIGINT);
|
1142
|
+
DataChunk updates_slice;
|
1143
|
+
updates_slice.InitializeEmpty(updates.GetTypes());
|
1142
1144
|
|
1143
|
-
|
1144
|
-
row_ids
|
1145
|
-
|
1146
|
-
auto
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1145
|
+
SelectionVector sel_local_update(count), sel_global_update(count);
|
1146
|
+
auto n_local_update = VectorOperations::GreaterThanEquals(row_ids, max_row_id_vec, nullptr, count,
|
1147
|
+
&sel_local_update, &sel_global_update);
|
1148
|
+
auto n_global_update = count - n_local_update;
|
1149
|
+
|
1150
|
+
// row id > MAX_ROW_ID? transaction-local storage
|
1151
|
+
if (n_local_update > 0) {
|
1152
|
+
updates_slice.Slice(updates, sel_local_update, n_local_update);
|
1153
|
+
updates_slice.Flatten();
|
1154
|
+
row_ids_slice.Slice(row_ids, sel_local_update, n_local_update);
|
1155
|
+
row_ids_slice.Flatten(n_local_update);
|
1156
|
+
|
1157
|
+
LocalStorage::Get(context, db).Update(*this, row_ids_slice, column_ids, updates_slice);
|
1152
1158
|
}
|
1153
1159
|
|
1154
|
-
//
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1160
|
+
// otherwise global storage
|
1161
|
+
if (n_global_update > 0) {
|
1162
|
+
updates_slice.Slice(updates, sel_global_update, n_global_update);
|
1163
|
+
updates_slice.Flatten();
|
1164
|
+
row_ids_slice.Slice(row_ids, sel_global_update, n_global_update);
|
1165
|
+
row_ids_slice.Flatten(n_global_update);
|
1166
|
+
|
1167
|
+
row_groups->Update(DuckTransaction::Get(context, db), FlatVector::GetData<row_t>(row_ids_slice), column_ids,
|
1168
|
+
updates_slice);
|
1169
|
+
}
|
1159
1170
|
}
|
1160
1171
|
|
1161
1172
|
void DataTable::UpdateColumn(TableCatalogEntry &table, ClientContext &context, Vector &row_ids,
|