duckdb 0.6.2-dev919.0 → 0.6.2-dev942.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +6 -12
- package/src/duckdb/extension/parquet/column_writer.cpp +6 -5
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -8
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -15
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -5
- package/src/duckdb/src/common/row_operations/row_gather.cpp +0 -1
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -1
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +0 -2
- package/src/duckdb/src/common/sort/sort_state.cpp +0 -2
- package/src/duckdb/src/common/types/value.cpp +6 -8
- package/src/duckdb/src/common/types/vector.cpp +40 -43
- package/src/duckdb/src/common/types.cpp +20 -25
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +0 -1
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +0 -2
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +0 -2
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +0 -1
- package/src/duckdb/src/execution/window_segment_tree.cpp +0 -17
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +22 -0
- package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +0 -1
- package/src/duckdb/src/function/aggregate/nested/histogram.cpp +7 -24
- package/src/duckdb/src/function/cast/list_casts.cpp +3 -3
- package/src/duckdb/src/function/cast/map_cast.cpp +19 -60
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +0 -1
- package/src/duckdb/src/function/scalar/map/cardinality.cpp +1 -4
- package/src/duckdb/src/function/scalar/map/map.cpp +26 -38
- package/src/duckdb/src/function/scalar/map/map_extract.cpp +7 -6
- package/src/duckdb/src/function/scalar/map/map_from_entries.cpp +2 -117
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +1 -1
- package/src/duckdb/src/function/table/arrow.cpp +6 -10
- package/src/duckdb/src/function/table/arrow_conversion.cpp +2 -55
- package/src/duckdb/src/function/table/system/test_all_types.cpp +14 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -3
- package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +4 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +3 -2
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +1 -8
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +11 -9
- package/src/duckdb/src/planner/expression_binder.cpp +6 -6
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
package/package.json
CHANGED
|
@@ -271,27 +271,22 @@ static void CreateValuesStruct(const JSONCreateFunctionData &info, yyjson_mut_do
|
|
|
271
271
|
|
|
272
272
|
static void CreateValuesMap(const JSONCreateFunctionData &info, yyjson_mut_doc *doc, yyjson_mut_val *vals[],
|
|
273
273
|
Vector &value_v, idx_t count) {
|
|
274
|
-
auto &entries = StructVector::GetEntries(value_v);
|
|
275
274
|
// Create nested keys
|
|
276
|
-
auto &
|
|
277
|
-
auto
|
|
278
|
-
auto map_key_count = ListVector::GetListSize(map_key_list_v);
|
|
275
|
+
auto &map_key_v = MapVector::GetKeys(value_v);
|
|
276
|
+
auto map_key_count = ListVector::GetListSize(value_v);
|
|
279
277
|
auto nested_keys_ptr = unique_ptr<yyjson_mut_val *[]>(new yyjson_mut_val *[map_key_count]);
|
|
280
278
|
auto nested_keys = nested_keys_ptr.get();
|
|
281
279
|
TemplatedCreateValues<string_t>(doc, nested_keys, map_key_v, map_key_count);
|
|
282
280
|
// Create nested values
|
|
283
|
-
auto &
|
|
284
|
-
auto
|
|
285
|
-
auto map_val_count = ListVector::GetListSize(map_val_list_v);
|
|
281
|
+
auto &map_val_v = MapVector::GetValues(value_v);
|
|
282
|
+
auto map_val_count = ListVector::GetListSize(value_v);
|
|
286
283
|
auto nested_vals_ptr = unique_ptr<yyjson_mut_val *[]>(new yyjson_mut_val *[map_val_count]);
|
|
287
284
|
auto nested_vals = nested_vals_ptr.get();
|
|
288
285
|
CreateValues(info, doc, nested_vals, map_val_v, map_val_count);
|
|
289
286
|
// Add the key/value pairs to the objects
|
|
290
287
|
UnifiedVectorFormat map_data;
|
|
291
288
|
value_v.ToUnifiedFormat(count, map_data);
|
|
292
|
-
|
|
293
|
-
map_key_list_v.ToUnifiedFormat(map_key_count, map_key_list_data);
|
|
294
|
-
auto map_key_list_entries = (list_entry_t *)map_key_list_data.data;
|
|
289
|
+
auto map_key_list_entries = (list_entry_t *)map_data.data;
|
|
295
290
|
for (idx_t i = 0; i < count; i++) {
|
|
296
291
|
idx_t idx = map_data.sel->get_index(i);
|
|
297
292
|
if (!map_data.validity.RowIsValid(idx)) {
|
|
@@ -299,8 +294,7 @@ static void CreateValuesMap(const JSONCreateFunctionData &info, yyjson_mut_doc *
|
|
|
299
294
|
vals[i] = yyjson_mut_null(doc);
|
|
300
295
|
} else {
|
|
301
296
|
vals[i] = yyjson_mut_obj(doc);
|
|
302
|
-
|
|
303
|
-
const auto &key_list_entry = map_key_list_entries[key_idx];
|
|
297
|
+
const auto &key_list_entry = map_key_list_entries[idx];
|
|
304
298
|
for (idx_t child_i = key_list_entry.offset; child_i < key_list_entry.offset + key_list_entry.length;
|
|
305
299
|
child_i++) {
|
|
306
300
|
if (!unsafe_yyjson_is_null(nested_keys[child_i])) {
|
|
@@ -1908,12 +1908,13 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
|
|
|
1908
1908
|
bool is_key = i == 0;
|
|
1909
1909
|
auto child_writer = CreateWriterRecursive(schemas, writer, kv_types[i], kv_names[i], schema_path,
|
|
1910
1910
|
max_repeat + 1, max_define + 2, !is_key);
|
|
1911
|
-
|
|
1912
|
-
|
|
1913
|
-
child_writers.push_back(move(list_writer));
|
|
1911
|
+
|
|
1912
|
+
child_writers.push_back(move(child_writer));
|
|
1914
1913
|
}
|
|
1915
|
-
|
|
1916
|
-
|
|
1914
|
+
auto struct_writer = make_unique<StructColumnWriter>(writer, schema_idx, schema_path, max_repeat, max_define,
|
|
1915
|
+
move(child_writers), can_have_nulls);
|
|
1916
|
+
return make_unique<ListColumnWriter>(writer, schema_idx, schema_path, max_repeat, max_define,
|
|
1917
|
+
move(struct_writer), can_have_nulls);
|
|
1917
1918
|
}
|
|
1918
1919
|
duckdb_parquet::format::SchemaElement schema_element;
|
|
1919
1920
|
schema_element.type = ParquetWriter::DuckDBTypeToParquetType(type);
|
|
@@ -302,14 +302,12 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(const FileMetaData
|
|
|
302
302
|
throw IOException("MAP_KEY_VALUE needs to be repeated");
|
|
303
303
|
}
|
|
304
304
|
result_type = LogicalType::MAP(move(child_types[0].second), move(child_types[1].second));
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
max_repeat - 1, move(child_readers));
|
|
312
|
-
return result;
|
|
305
|
+
|
|
306
|
+
auto struct_reader =
|
|
307
|
+
make_unique<StructColumnReader>(*this, ListType::GetChildType(result_type), s_ele, this_idx,
|
|
308
|
+
max_define - 1, max_repeat - 1, move(child_readers));
|
|
309
|
+
return make_unique<ListColumnReader>(*this, result_type, s_ele, this_idx, max_define, max_repeat,
|
|
310
|
+
move(struct_reader));
|
|
313
311
|
}
|
|
314
312
|
if (child_types.size() > 1 || (!is_list && !is_map && !is_repeated)) {
|
|
315
313
|
result_type = LogicalType::STRUCT(move(child_types));
|
|
@@ -469,33 +469,21 @@ struct ArrowMapData {
|
|
|
469
469
|
input.ToUnifiedFormat(size, format);
|
|
470
470
|
|
|
471
471
|
AppendValidity(append_data, format, size);
|
|
472
|
-
// maps exist as a struct of two lists, e.g. STRUCT(key VARCHAR[], value VARCHAR[])
|
|
473
|
-
// since both lists are the same, arrow tries to be smart by storing the offsets only once
|
|
474
|
-
// we can append the offsets from any of the two children
|
|
475
|
-
auto &children = StructVector::GetEntries(input);
|
|
476
|
-
|
|
477
|
-
UnifiedVectorFormat child_format;
|
|
478
|
-
children[0]->ToUnifiedFormat(size, child_format);
|
|
479
472
|
vector<sel_t> child_indices;
|
|
480
|
-
AppendListOffsets(append_data,
|
|
473
|
+
AppendListOffsets(append_data, format, size, child_indices);
|
|
481
474
|
|
|
482
|
-
// now we can append the children to the lists
|
|
483
|
-
auto &struct_entries = StructVector::GetEntries(input);
|
|
484
|
-
D_ASSERT(struct_entries.size() == 2);
|
|
485
475
|
SelectionVector child_sel(child_indices.data());
|
|
486
|
-
auto &key_vector =
|
|
487
|
-
auto &value_vector =
|
|
476
|
+
auto &key_vector = MapVector::GetKeys(input);
|
|
477
|
+
auto &value_vector = MapVector::GetValues(input);
|
|
488
478
|
auto list_size = child_indices.size();
|
|
489
479
|
key_vector.Slice(child_sel, list_size);
|
|
490
480
|
value_vector.Slice(child_sel, list_size);
|
|
491
481
|
|
|
492
|
-
// perform the append
|
|
493
482
|
auto &struct_data = *append_data.child_data[0];
|
|
494
483
|
auto &key_data = *struct_data.child_data[0];
|
|
495
484
|
auto &value_data = *struct_data.child_data[1];
|
|
496
485
|
key_data.append_vector(key_data, key_vector, list_size);
|
|
497
486
|
value_data.append_vector(value_data, value_vector, list_size);
|
|
498
|
-
|
|
499
487
|
append_data.row_count += size;
|
|
500
488
|
struct_data.row_count += size;
|
|
501
489
|
}
|
|
@@ -73,11 +73,7 @@ void SetArrowMapFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child,
|
|
|
73
73
|
InitializeChild(root_holder.nested_children.back()[0]);
|
|
74
74
|
child.children = &root_holder.nested_children_ptr.back()[0];
|
|
75
75
|
child.children[0]->name = "entries";
|
|
76
|
-
|
|
77
|
-
struct_child_types.push_back(std::make_pair("key", ListType::GetChildType(StructType::GetChildType(type, 0))));
|
|
78
|
-
struct_child_types.push_back(std::make_pair("value", ListType::GetChildType(StructType::GetChildType(type, 1))));
|
|
79
|
-
auto struct_type = LogicalType::STRUCT(move(struct_child_types));
|
|
80
|
-
SetArrowFormat(root_holder, *child.children[0], struct_type, config_timezone);
|
|
76
|
+
SetArrowFormat(root_holder, **child.children, ListType::GetChildType(type), config_timezone);
|
|
81
77
|
}
|
|
82
78
|
|
|
83
79
|
void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, const LogicalType &type,
|
|
@@ -163,7 +163,6 @@ void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector
|
|
|
163
163
|
GatherVarchar(rows, row_sel, col, col_sel, count, layout, col_no, build_size, heap_ptr);
|
|
164
164
|
break;
|
|
165
165
|
case PhysicalType::LIST:
|
|
166
|
-
case PhysicalType::MAP:
|
|
167
166
|
case PhysicalType::STRUCT:
|
|
168
167
|
GatherNestedVector(rows, row_sel, col, col_sel, count, layout, col_no, heap_ptr);
|
|
169
168
|
break;
|
|
@@ -201,7 +201,6 @@ static void TemplatedMatchOp(Vector &vec, UnifiedVectorFormat &col, const RowLay
|
|
|
201
201
|
no_match_count);
|
|
202
202
|
break;
|
|
203
203
|
case PhysicalType::LIST:
|
|
204
|
-
case PhysicalType::MAP:
|
|
205
204
|
case PhysicalType::STRUCT:
|
|
206
205
|
TemplatedMatchNested<OP, NO_MATCH_SEL>(vec, rows, sel, count, layout, col_no, no_match, no_match_count);
|
|
207
206
|
break;
|
|
@@ -143,7 +143,6 @@ void RowOperations::Scatter(DataChunk &columns, UnifiedVectorFormat col_data[],
|
|
|
143
143
|
ComputeStringEntrySizes(col, entry_sizes, sel, count);
|
|
144
144
|
break;
|
|
145
145
|
case PhysicalType::LIST:
|
|
146
|
-
case PhysicalType::MAP:
|
|
147
146
|
case PhysicalType::STRUCT:
|
|
148
147
|
RowOperations::ComputeEntrySizes(vec, col, entry_sizes, vcount, count, sel);
|
|
149
148
|
break;
|
|
@@ -215,7 +214,6 @@ void RowOperations::Scatter(DataChunk &columns, UnifiedVectorFormat col_data[],
|
|
|
215
214
|
ScatterStringVector(col, rows, data_locations, sel, count, col_offset, col_no);
|
|
216
215
|
break;
|
|
217
216
|
case PhysicalType::LIST:
|
|
218
|
-
case PhysicalType::MAP:
|
|
219
217
|
case PhysicalType::STRUCT:
|
|
220
218
|
ScatterNestedVector(vec, col, rows, data_locations, sel, count, col_offset, col_no, vcount);
|
|
221
219
|
break;
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
#include "duckdb/common/sort/sorted_block.hpp"
|
|
5
5
|
#include "duckdb/storage/statistics/string_statistics.hpp"
|
|
6
6
|
#include "duckdb/common/radix.hpp"
|
|
7
|
-
|
|
8
7
|
#include <algorithm>
|
|
9
8
|
#include <numeric>
|
|
10
9
|
|
|
@@ -28,7 +27,6 @@ idx_t GetNestedSortingColSize(idx_t &col_size, const LogicalType &type) {
|
|
|
28
27
|
// Lists get 2 bytes (null and empty list)
|
|
29
28
|
col_size += 2;
|
|
30
29
|
return GetNestedSortingColSize(col_size, ListType::GetChildType(type));
|
|
31
|
-
case PhysicalType::MAP:
|
|
32
30
|
case PhysicalType::STRUCT:
|
|
33
31
|
// Structs get 1 bytes (null)
|
|
34
32
|
col_size++;
|
|
@@ -528,17 +528,15 @@ Value Value::STRUCT(child_list_t<Value> values) {
|
|
|
528
528
|
return result;
|
|
529
529
|
}
|
|
530
530
|
|
|
531
|
-
Value Value::MAP(
|
|
531
|
+
Value Value::MAP(LogicalType child_type, vector<Value> values) {
|
|
532
532
|
Value result;
|
|
533
|
-
child_list_t<LogicalType> child_types;
|
|
534
|
-
child_types.push_back({"key", key.type()});
|
|
535
|
-
child_types.push_back({"value", value.type()});
|
|
536
|
-
|
|
537
|
-
result.type_ = LogicalType::MAP(move(child_types));
|
|
538
533
|
|
|
539
|
-
result.
|
|
540
|
-
result.struct_value.push_back(move(value));
|
|
534
|
+
result.type_ = LogicalType::MAP(move(child_type));
|
|
541
535
|
result.is_null = false;
|
|
536
|
+
if (values.empty()) {
|
|
537
|
+
return result;
|
|
538
|
+
}
|
|
539
|
+
result.list_value = move(values);
|
|
542
540
|
return result;
|
|
543
541
|
}
|
|
544
542
|
|
|
@@ -546,10 +546,13 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) {
|
|
|
546
546
|
return Value::BLOB((const_data_ptr_t)str.GetDataUnsafe(), str.GetSize());
|
|
547
547
|
}
|
|
548
548
|
case LogicalTypeId::MAP: {
|
|
549
|
-
auto
|
|
550
|
-
|
|
551
|
-
Value
|
|
552
|
-
|
|
549
|
+
auto offlen = ((list_entry_t *)data)[index];
|
|
550
|
+
auto &child_vec = ListVector::GetEntry(*vector);
|
|
551
|
+
std::vector<Value> children;
|
|
552
|
+
for (idx_t i = offlen.offset; i < offlen.offset + offlen.length; i++) {
|
|
553
|
+
children.push_back(child_vec.GetValue(i));
|
|
554
|
+
}
|
|
555
|
+
return Value::MAP(ListType::GetChildType(type), move(children));
|
|
553
556
|
}
|
|
554
557
|
case LogicalTypeId::UNION: {
|
|
555
558
|
auto tag = UnionVector::GetTag(*vector, index);
|
|
@@ -1511,12 +1514,12 @@ void FSSTVector::DecompressVector(const Vector &src, Vector &dst, idx_t src_offs
|
|
|
1511
1514
|
}
|
|
1512
1515
|
|
|
1513
1516
|
Vector &MapVector::GetKeys(Vector &vector) {
|
|
1514
|
-
auto &entries = StructVector::GetEntries(vector);
|
|
1517
|
+
auto &entries = StructVector::GetEntries(ListVector::GetEntry(vector));
|
|
1515
1518
|
D_ASSERT(entries.size() == 2);
|
|
1516
1519
|
return *entries[0];
|
|
1517
1520
|
}
|
|
1518
1521
|
Vector &MapVector::GetValues(Vector &vector) {
|
|
1519
|
-
auto &entries = StructVector::GetEntries(vector);
|
|
1522
|
+
auto &entries = StructVector::GetEntries(ListVector::GetEntry(vector));
|
|
1520
1523
|
D_ASSERT(entries.size() == 2);
|
|
1521
1524
|
return *entries[1];
|
|
1522
1525
|
}
|
|
@@ -1529,8 +1532,7 @@ const Vector &MapVector::GetValues(const Vector &vector) {
|
|
|
1529
1532
|
}
|
|
1530
1533
|
|
|
1531
1534
|
vector<unique_ptr<Vector>> &StructVector::GetEntries(Vector &vector) {
|
|
1532
|
-
D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT || vector.GetType().id() == LogicalTypeId::
|
|
1533
|
-
vector.GetType().id() == LogicalTypeId::UNION);
|
|
1535
|
+
D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT || vector.GetType().id() == LogicalTypeId::UNION);
|
|
1534
1536
|
|
|
1535
1537
|
if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
|
|
1536
1538
|
auto &child = DictionaryVector::Child(vector);
|
|
@@ -1548,7 +1550,7 @@ const vector<unique_ptr<Vector>> &StructVector::GetEntries(const Vector &vector)
|
|
|
1548
1550
|
}
|
|
1549
1551
|
|
|
1550
1552
|
const Vector &ListVector::GetEntry(const Vector &vector) {
|
|
1551
|
-
D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST);
|
|
1553
|
+
D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST || vector.GetType().id() == LogicalTypeId::MAP);
|
|
1552
1554
|
if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
|
|
1553
1555
|
auto &child = DictionaryVector::Child(vector);
|
|
1554
1556
|
return ListVector::GetEntry(child);
|
|
@@ -1566,7 +1568,7 @@ Vector &ListVector::GetEntry(Vector &vector) {
|
|
|
1566
1568
|
}
|
|
1567
1569
|
|
|
1568
1570
|
void ListVector::Reserve(Vector &vector, idx_t required_capacity) {
|
|
1569
|
-
D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST);
|
|
1571
|
+
D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST || vector.GetType().id() == LogicalTypeId::MAP);
|
|
1570
1572
|
D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR ||
|
|
1571
1573
|
vector.GetVectorType() == VectorType::CONSTANT_VECTOR);
|
|
1572
1574
|
D_ASSERT(vector.auxiliary);
|
|
@@ -1576,10 +1578,10 @@ void ListVector::Reserve(Vector &vector, idx_t required_capacity) {
|
|
|
1576
1578
|
}
|
|
1577
1579
|
|
|
1578
1580
|
template <class T>
|
|
1579
|
-
void TemplatedSearchInMap(Vector &
|
|
1580
|
-
|
|
1581
|
+
void TemplatedSearchInMap(Vector &keys, idx_t count, T key, vector<idx_t> &offsets, bool is_key_null, idx_t offset,
|
|
1582
|
+
idx_t length) {
|
|
1581
1583
|
UnifiedVectorFormat vector_data;
|
|
1582
|
-
|
|
1584
|
+
keys.ToUnifiedFormat(count, vector_data);
|
|
1583
1585
|
auto data = (T *)vector_data.data;
|
|
1584
1586
|
auto validity_mask = vector_data.validity;
|
|
1585
1587
|
|
|
@@ -1602,16 +1604,15 @@ void TemplatedSearchInMap(Vector &list, T key, vector<idx_t> &offsets, bool is_k
|
|
|
1602
1604
|
}
|
|
1603
1605
|
|
|
1604
1606
|
template <class T>
|
|
1605
|
-
void TemplatedSearchInMap(Vector &
|
|
1606
|
-
idx_t length) {
|
|
1607
|
-
TemplatedSearchInMap<T>(
|
|
1607
|
+
void TemplatedSearchInMap(Vector &keys, idx_t count, const Value &key, vector<idx_t> &offsets, bool is_key_null,
|
|
1608
|
+
idx_t offset, idx_t length) {
|
|
1609
|
+
TemplatedSearchInMap<T>(keys, count, key.template GetValueUnsafe<T>(), offsets, is_key_null, offset, length);
|
|
1608
1610
|
}
|
|
1609
1611
|
|
|
1610
|
-
void SearchStringInMap(Vector &
|
|
1611
|
-
idx_t length) {
|
|
1612
|
-
auto &list_vector = ListVector::GetEntry(list);
|
|
1612
|
+
void SearchStringInMap(Vector &keys, idx_t count, const string &key, vector<idx_t> &offsets, bool is_key_null,
|
|
1613
|
+
idx_t offset, idx_t length) {
|
|
1613
1614
|
UnifiedVectorFormat vector_data;
|
|
1614
|
-
|
|
1615
|
+
keys.ToUnifiedFormat(count, vector_data);
|
|
1615
1616
|
auto data = (string_t *)vector_data.data;
|
|
1616
1617
|
auto validity_mask = vector_data.validity;
|
|
1617
1618
|
if (is_key_null) {
|
|
@@ -1633,67 +1634,63 @@ void SearchStringInMap(Vector &list, const string &key, vector<idx_t> &offsets,
|
|
|
1633
1634
|
}
|
|
1634
1635
|
}
|
|
1635
1636
|
|
|
1636
|
-
vector<idx_t>
|
|
1637
|
+
vector<idx_t> MapVector::Search(Vector &keys, idx_t count, const Value &key, list_entry_t &entry) {
|
|
1637
1638
|
vector<idx_t> offsets;
|
|
1638
1639
|
|
|
1639
|
-
|
|
1640
|
-
auto &entry = ListVector::GetData(list)[row];
|
|
1641
|
-
|
|
1642
|
-
switch (list_vector.GetType().InternalType()) {
|
|
1640
|
+
switch (keys.GetType().InternalType()) {
|
|
1643
1641
|
case PhysicalType::BOOL:
|
|
1644
1642
|
case PhysicalType::INT8:
|
|
1645
|
-
TemplatedSearchInMap<int8_t>(
|
|
1643
|
+
TemplatedSearchInMap<int8_t>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1646
1644
|
break;
|
|
1647
1645
|
case PhysicalType::INT16:
|
|
1648
|
-
TemplatedSearchInMap<int16_t>(
|
|
1646
|
+
TemplatedSearchInMap<int16_t>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1649
1647
|
break;
|
|
1650
1648
|
case PhysicalType::INT32:
|
|
1651
|
-
TemplatedSearchInMap<int32_t>(
|
|
1649
|
+
TemplatedSearchInMap<int32_t>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1652
1650
|
break;
|
|
1653
1651
|
case PhysicalType::INT64:
|
|
1654
|
-
TemplatedSearchInMap<int64_t>(
|
|
1652
|
+
TemplatedSearchInMap<int64_t>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1655
1653
|
break;
|
|
1656
1654
|
case PhysicalType::INT128:
|
|
1657
|
-
TemplatedSearchInMap<hugeint_t>(
|
|
1655
|
+
TemplatedSearchInMap<hugeint_t>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1658
1656
|
break;
|
|
1659
1657
|
case PhysicalType::UINT8:
|
|
1660
|
-
TemplatedSearchInMap<uint8_t>(
|
|
1658
|
+
TemplatedSearchInMap<uint8_t>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1661
1659
|
break;
|
|
1662
1660
|
case PhysicalType::UINT16:
|
|
1663
|
-
TemplatedSearchInMap<uint16_t>(
|
|
1661
|
+
TemplatedSearchInMap<uint16_t>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1664
1662
|
break;
|
|
1665
1663
|
case PhysicalType::UINT32:
|
|
1666
|
-
TemplatedSearchInMap<uint32_t>(
|
|
1664
|
+
TemplatedSearchInMap<uint32_t>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1667
1665
|
break;
|
|
1668
1666
|
case PhysicalType::UINT64:
|
|
1669
|
-
TemplatedSearchInMap<uint64_t>(
|
|
1667
|
+
TemplatedSearchInMap<uint64_t>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1670
1668
|
break;
|
|
1671
1669
|
case PhysicalType::FLOAT:
|
|
1672
|
-
TemplatedSearchInMap<float>(
|
|
1670
|
+
TemplatedSearchInMap<float>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1673
1671
|
break;
|
|
1674
1672
|
case PhysicalType::DOUBLE:
|
|
1675
|
-
TemplatedSearchInMap<double>(
|
|
1673
|
+
TemplatedSearchInMap<double>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1676
1674
|
break;
|
|
1677
1675
|
case PhysicalType::INTERVAL:
|
|
1678
|
-
TemplatedSearchInMap<interval_t>(
|
|
1676
|
+
TemplatedSearchInMap<interval_t>(keys, count, key, offsets, key.IsNull(), entry.offset, entry.length);
|
|
1679
1677
|
break;
|
|
1680
1678
|
case PhysicalType::VARCHAR:
|
|
1681
|
-
SearchStringInMap(
|
|
1679
|
+
SearchStringInMap(keys, count, StringValue::Get(key), offsets, key.IsNull(), entry.offset, entry.length);
|
|
1682
1680
|
break;
|
|
1683
1681
|
default:
|
|
1684
|
-
throw InvalidTypeException(
|
|
1682
|
+
throw InvalidTypeException(keys.GetType().id(), "Invalid type for List Vector Search");
|
|
1685
1683
|
}
|
|
1686
1684
|
return offsets;
|
|
1687
1685
|
}
|
|
1688
1686
|
|
|
1689
|
-
Value
|
|
1690
|
-
auto &child_vec = ListVector::GetEntry(list);
|
|
1687
|
+
Value FlatVector::GetValuesFromOffsets(Vector &values, vector<idx_t> &offsets) {
|
|
1691
1688
|
vector<Value> list_values;
|
|
1692
1689
|
list_values.reserve(offsets.size());
|
|
1693
1690
|
for (auto &offset : offsets) {
|
|
1694
|
-
list_values.push_back(
|
|
1691
|
+
list_values.push_back(values.GetValue(offset));
|
|
1695
1692
|
}
|
|
1696
|
-
return Value::LIST(
|
|
1693
|
+
return Value::LIST(values.GetType(), move(list_values));
|
|
1697
1694
|
}
|
|
1698
1695
|
|
|
1699
1696
|
idx_t ListVector::GetListSize(const Vector &vec) {
|
|
@@ -107,11 +107,11 @@ PhysicalType LogicalType::GetInternalType() {
|
|
|
107
107
|
return PhysicalType::VARCHAR;
|
|
108
108
|
case LogicalTypeId::INTERVAL:
|
|
109
109
|
return PhysicalType::INTERVAL;
|
|
110
|
-
case LogicalTypeId::MAP:
|
|
111
110
|
case LogicalTypeId::UNION:
|
|
112
111
|
case LogicalTypeId::STRUCT:
|
|
113
112
|
return PhysicalType::STRUCT;
|
|
114
113
|
case LogicalTypeId::LIST:
|
|
114
|
+
case LogicalTypeId::MAP:
|
|
115
115
|
return PhysicalType::LIST;
|
|
116
116
|
case LogicalTypeId::POINTER:
|
|
117
117
|
// LCOV_EXCL_START
|
|
@@ -257,8 +257,6 @@ string TypeIdToString(PhysicalType type) {
|
|
|
257
257
|
return "INVALID";
|
|
258
258
|
case PhysicalType::BIT:
|
|
259
259
|
return "BIT";
|
|
260
|
-
case PhysicalType::MAP:
|
|
261
|
-
return "MAP";
|
|
262
260
|
case PhysicalType::UNKNOWN:
|
|
263
261
|
return "UNKNOWN";
|
|
264
262
|
}
|
|
@@ -443,15 +441,9 @@ string LogicalType::ToString() const {
|
|
|
443
441
|
if (!type_info_) {
|
|
444
442
|
return "MAP";
|
|
445
443
|
}
|
|
446
|
-
auto &
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
}
|
|
450
|
-
if (child_types.size() != 2) {
|
|
451
|
-
throw InternalException("Map needs exactly two child elements");
|
|
452
|
-
}
|
|
453
|
-
return "MAP(" + ListType::GetChildType(child_types[0].second).ToString() + ", " +
|
|
454
|
-
ListType::GetChildType(child_types[1].second).ToString() + ")";
|
|
444
|
+
auto &key_type = MapType::KeyType(*this);
|
|
445
|
+
auto &value_type = MapType::ValueType(*this);
|
|
446
|
+
return "MAP(" + key_type.ToString() + ", " + value_type.ToString() + ")";
|
|
455
447
|
}
|
|
456
448
|
case LogicalTypeId::UNION: {
|
|
457
449
|
if (!type_info_) {
|
|
@@ -747,7 +739,12 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
|
|
|
747
739
|
auto new_child = MaxLogicalType(ListType::GetChildType(left), ListType::GetChildType(right));
|
|
748
740
|
return LogicalType::LIST(move(new_child));
|
|
749
741
|
}
|
|
750
|
-
if (type_id == LogicalTypeId::
|
|
742
|
+
if (type_id == LogicalTypeId::MAP) {
|
|
743
|
+
// list: perform max recursively on child type
|
|
744
|
+
auto new_child = MaxLogicalType(ListType::GetChildType(left), ListType::GetChildType(right));
|
|
745
|
+
return LogicalType::MAP(move(new_child));
|
|
746
|
+
}
|
|
747
|
+
if (type_id == LogicalTypeId::STRUCT) {
|
|
751
748
|
// struct: perform recursively
|
|
752
749
|
auto &left_child_types = StructType::GetChildTypes(left);
|
|
753
750
|
auto &right_child_types = StructType::GetChildTypes(right);
|
|
@@ -762,8 +759,7 @@ LogicalType LogicalType::MaxLogicalType(const LogicalType &left, const LogicalTy
|
|
|
762
759
|
child_types.push_back(make_pair(left_child_types[i].first, move(child_type)));
|
|
763
760
|
}
|
|
764
761
|
|
|
765
|
-
return
|
|
766
|
-
: LogicalType::MAP(move(child_types));
|
|
762
|
+
return LogicalType::STRUCT(move(child_types));
|
|
767
763
|
}
|
|
768
764
|
if (type_id == LogicalTypeId::UNION) {
|
|
769
765
|
auto left_member_count = UnionType::GetMemberCount(left);
|
|
@@ -1040,7 +1036,7 @@ protected:
|
|
|
1040
1036
|
};
|
|
1041
1037
|
|
|
1042
1038
|
const LogicalType &ListType::GetChildType(const LogicalType &type) {
|
|
1043
|
-
D_ASSERT(type.id() == LogicalTypeId::LIST);
|
|
1039
|
+
D_ASSERT(type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::MAP);
|
|
1044
1040
|
auto info = type.AuxInfo();
|
|
1045
1041
|
D_ASSERT(info);
|
|
1046
1042
|
return ((ListTypeInfo &)*info).child_type;
|
|
@@ -1154,8 +1150,7 @@ const string AggregateStateType::GetTypeName(const LogicalType &type) {
|
|
|
1154
1150
|
}
|
|
1155
1151
|
|
|
1156
1152
|
const child_list_t<LogicalType> &StructType::GetChildTypes(const LogicalType &type) {
|
|
1157
|
-
D_ASSERT(type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::
|
|
1158
|
-
type.id() == LogicalTypeId::UNION);
|
|
1153
|
+
D_ASSERT(type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::UNION);
|
|
1159
1154
|
|
|
1160
1155
|
auto info = type.AuxInfo();
|
|
1161
1156
|
D_ASSERT(info);
|
|
@@ -1191,26 +1186,26 @@ LogicalType LogicalType::AGGREGATE_STATE(aggregate_state_t state_type) { // NOLI
|
|
|
1191
1186
|
//===--------------------------------------------------------------------===//
|
|
1192
1187
|
// Map Type
|
|
1193
1188
|
//===--------------------------------------------------------------------===//
|
|
1194
|
-
LogicalType LogicalType::MAP(
|
|
1195
|
-
auto info = make_shared<
|
|
1189
|
+
LogicalType LogicalType::MAP(LogicalType child) {
|
|
1190
|
+
auto info = make_shared<ListTypeInfo>(move(child));
|
|
1196
1191
|
return LogicalType(LogicalTypeId::MAP, move(info));
|
|
1197
1192
|
}
|
|
1198
1193
|
|
|
1199
1194
|
LogicalType LogicalType::MAP(LogicalType key, LogicalType value) {
|
|
1200
1195
|
child_list_t<LogicalType> child_types;
|
|
1201
|
-
child_types.push_back({"key",
|
|
1202
|
-
child_types.push_back({"value",
|
|
1203
|
-
return LogicalType::MAP(move(child_types));
|
|
1196
|
+
child_types.push_back({"key", move(key)});
|
|
1197
|
+
child_types.push_back({"value", move(value)});
|
|
1198
|
+
return LogicalType::MAP(LogicalType::STRUCT(move(child_types)));
|
|
1204
1199
|
}
|
|
1205
1200
|
|
|
1206
1201
|
const LogicalType &MapType::KeyType(const LogicalType &type) {
|
|
1207
1202
|
D_ASSERT(type.id() == LogicalTypeId::MAP);
|
|
1208
|
-
return ListType::GetChildType(
|
|
1203
|
+
return StructType::GetChildTypes(ListType::GetChildType(type))[0].second;
|
|
1209
1204
|
}
|
|
1210
1205
|
|
|
1211
1206
|
const LogicalType &MapType::ValueType(const LogicalType &type) {
|
|
1212
1207
|
D_ASSERT(type.id() == LogicalTypeId::MAP);
|
|
1213
|
-
return ListType::GetChildType(
|
|
1208
|
+
return StructType::GetChildTypes(ListType::GetChildType(type))[1].second;
|
|
1214
1209
|
}
|
|
1215
1210
|
|
|
1216
1211
|
//===--------------------------------------------------------------------===//
|
|
@@ -769,7 +769,6 @@ static void ExecuteDistinct(Vector &left, Vector &right, Vector &result, idx_t c
|
|
|
769
769
|
TemplatedDistinctExecute<string_t, OP>(left, right, result, count);
|
|
770
770
|
break;
|
|
771
771
|
case PhysicalType::LIST:
|
|
772
|
-
case PhysicalType::MAP:
|
|
773
772
|
case PhysicalType::STRUCT:
|
|
774
773
|
NestedDistinctExecute<OP>(left, right, result, count);
|
|
775
774
|
break;
|
|
@@ -810,7 +809,6 @@ static idx_t TemplatedDistinctSelectOperation(Vector &left, Vector &right, const
|
|
|
810
809
|
return DistinctSelect<interval_t, interval_t, OP>(left, right, sel, count, true_sel, false_sel);
|
|
811
810
|
case PhysicalType::VARCHAR:
|
|
812
811
|
return DistinctSelect<string_t, string_t, OP>(left, right, sel, count, true_sel, false_sel);
|
|
813
|
-
case PhysicalType::MAP:
|
|
814
812
|
case PhysicalType::STRUCT:
|
|
815
813
|
case PhysicalType::LIST:
|
|
816
814
|
return DistinctSelectNested<OP, OPNESTED>(left, right, sel, count, true_sel, false_sel);
|
|
@@ -220,7 +220,6 @@ static inline void HashTypeSwitch(Vector &input, Vector &result, const Selection
|
|
|
220
220
|
case PhysicalType::VARCHAR:
|
|
221
221
|
TemplatedLoopHash<HAS_RSEL, string_t>(input, result, rsel, count);
|
|
222
222
|
break;
|
|
223
|
-
case PhysicalType::MAP:
|
|
224
223
|
case PhysicalType::STRUCT:
|
|
225
224
|
StructLoopHash<HAS_RSEL, true>(input, result, rsel, count);
|
|
226
225
|
break;
|
|
@@ -352,7 +351,6 @@ static inline void CombineHashTypeSwitch(Vector &hashes, Vector &input, const Se
|
|
|
352
351
|
case PhysicalType::VARCHAR:
|
|
353
352
|
TemplatedLoopCombineHash<HAS_RSEL, string_t>(input, hashes, rsel, count);
|
|
354
353
|
break;
|
|
355
|
-
case PhysicalType::MAP:
|
|
356
354
|
case PhysicalType::STRUCT:
|
|
357
355
|
StructLoopHash<HAS_RSEL, false>(input, hashes, rsel, count);
|
|
358
356
|
break;
|
|
@@ -94,7 +94,6 @@ static idx_t TemplatedSelectOperation(Vector &left, Vector &right, const Selecti
|
|
|
94
94
|
case PhysicalType::VARCHAR:
|
|
95
95
|
return BinaryExecutor::Select<string_t, string_t, OP>(left, right, sel, count, true_sel, false_sel);
|
|
96
96
|
case PhysicalType::LIST:
|
|
97
|
-
case PhysicalType::MAP:
|
|
98
97
|
case PhysicalType::STRUCT:
|
|
99
98
|
return NestedSelectOperation<OP>(left, right, sel, count, true_sel, false_sel);
|
|
100
99
|
default:
|
|
@@ -167,23 +167,6 @@ void WindowSegmentTree::ConstructTree() {
|
|
|
167
167
|
void WindowSegmentTree::Compute(Vector &result, idx_t rid, idx_t begin, idx_t end) {
|
|
168
168
|
D_ASSERT(input_ref);
|
|
169
169
|
|
|
170
|
-
// No arguments, so just count
|
|
171
|
-
if (inputs.ColumnCount() == 0) {
|
|
172
|
-
D_ASSERT(GetTypeIdSize(result_type.InternalType()) == sizeof(idx_t));
|
|
173
|
-
auto data = FlatVector::GetData<idx_t>(result);
|
|
174
|
-
// Slice to any filtered rows
|
|
175
|
-
if (!filter_mask.AllValid()) {
|
|
176
|
-
idx_t filtered = 0;
|
|
177
|
-
for (idx_t i = begin; i < end; ++i) {
|
|
178
|
-
filtered += filter_mask.RowIsValid(i);
|
|
179
|
-
}
|
|
180
|
-
data[rid] = filtered;
|
|
181
|
-
} else {
|
|
182
|
-
data[rid] = end - begin;
|
|
183
|
-
}
|
|
184
|
-
return;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
170
|
// If we have a window function, use that
|
|
188
171
|
if (aggregate.window && UseWindowAPI()) {
|
|
189
172
|
// Frame boundaries
|
|
@@ -33,6 +33,26 @@ struct CountStarFunction : public BaseCountFunction {
|
|
|
33
33
|
static void ConstantOperation(STATE *state, AggregateInputData &, idx_t count) {
|
|
34
34
|
*state += count;
|
|
35
35
|
}
|
|
36
|
+
|
|
37
|
+
template <typename RESULT_TYPE>
|
|
38
|
+
static void Window(Vector inputs[], const ValidityMask &filter_mask, AggregateInputData &aggr_input_data,
|
|
39
|
+
idx_t input_count, data_ptr_t state, const FrameBounds &frame, const FrameBounds &prev,
|
|
40
|
+
Vector &result, idx_t rid, idx_t bias) {
|
|
41
|
+
D_ASSERT(input_count == 0);
|
|
42
|
+
auto data = FlatVector::GetData<RESULT_TYPE>(result);
|
|
43
|
+
const auto begin = frame.first;
|
|
44
|
+
const auto end = frame.second;
|
|
45
|
+
// Slice to any filtered rows
|
|
46
|
+
if (!filter_mask.AllValid()) {
|
|
47
|
+
RESULT_TYPE filtered = 0;
|
|
48
|
+
for (auto i = begin; i < end; ++i) {
|
|
49
|
+
filtered += filter_mask.RowIsValid(i);
|
|
50
|
+
}
|
|
51
|
+
data[rid] = filtered;
|
|
52
|
+
} else {
|
|
53
|
+
data[rid] = end - begin;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
36
56
|
};
|
|
37
57
|
|
|
38
58
|
struct CountFunction : public BaseCountFunction {
|
|
@@ -72,6 +92,7 @@ AggregateFunction CountStarFun::GetFunction() {
|
|
|
72
92
|
auto fun = AggregateFunction::NullaryAggregate<int64_t, int64_t, CountStarFunction>(LogicalType::BIGINT);
|
|
73
93
|
fun.name = "count_star";
|
|
74
94
|
fun.null_handling = FunctionNullHandling::SPECIAL_HANDLING;
|
|
95
|
+
fun.window = CountStarFunction::Window<int64_t>;
|
|
75
96
|
// TODO is there a better way to set those?
|
|
76
97
|
fun.serialize = CountStarSerialize;
|
|
77
98
|
fun.deserialize = CountStarDeserialize;
|
|
@@ -98,6 +119,7 @@ void CountFun::RegisterFunction(BuiltinFunctions &set) {
|
|
|
98
119
|
// the count function can also be called without arguments
|
|
99
120
|
count_function.arguments.clear();
|
|
100
121
|
count_function.statistics = nullptr;
|
|
122
|
+
count_function.window = CountStarFunction::Window<int64_t>;
|
|
101
123
|
count.AddFunction(count_function);
|
|
102
124
|
set.AddFunction(count);
|
|
103
125
|
}
|