duckdb 0.6.2-dev919.0 → 0.6.2-dev939.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +6 -12
- package/src/duckdb/extension/parquet/column_writer.cpp +6 -5
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -8
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -15
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -5
- package/src/duckdb/src/common/row_operations/row_gather.cpp +0 -1
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -1
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +0 -2
- package/src/duckdb/src/common/sort/sort_state.cpp +0 -2
- package/src/duckdb/src/common/types/value.cpp +6 -8
- package/src/duckdb/src/common/types/vector.cpp +40 -43
- package/src/duckdb/src/common/types.cpp +20 -25
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +0 -1
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +0 -2
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +0 -2
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +0 -1
- package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +0 -1
- package/src/duckdb/src/function/aggregate/nested/histogram.cpp +7 -24
- package/src/duckdb/src/function/cast/list_casts.cpp +3 -3
- package/src/duckdb/src/function/cast/map_cast.cpp +19 -60
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +0 -1
- package/src/duckdb/src/function/scalar/map/cardinality.cpp +1 -4
- package/src/duckdb/src/function/scalar/map/map.cpp +26 -38
- package/src/duckdb/src/function/scalar/map/map_extract.cpp +7 -6
- package/src/duckdb/src/function/scalar/map/map_from_entries.cpp +2 -117
- package/src/duckdb/src/function/table/arrow.cpp +6 -10
- package/src/duckdb/src/function/table/arrow_conversion.cpp +2 -55
- package/src/duckdb/src/function/table/system/test_all_types.cpp +14 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -3
- package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +4 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +3 -2
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +1 -8
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +11 -9
- package/src/duckdb/src/planner/expression_binder.cpp +6 -6
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
|
@@ -12,7 +12,7 @@ unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &i
|
|
|
12
12
|
return make_unique<ListBoundCastData>(move(child_cast));
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
bool ListCast::ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
|
16
16
|
auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
|
|
17
17
|
|
|
18
18
|
// only handle constant and flat vectors here for now
|
|
@@ -53,7 +53,7 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
|
|
|
53
53
|
auto constant = source.GetVectorType() == VectorType::CONSTANT_VECTOR;
|
|
54
54
|
// first cast the child vector to varchar
|
|
55
55
|
Vector varchar_list(LogicalType::LIST(LogicalType::VARCHAR), count);
|
|
56
|
-
ListToListCast(source, varchar_list, count, parameters);
|
|
56
|
+
ListCast::ListToListCast(source, varchar_list, count, parameters);
|
|
57
57
|
|
|
58
58
|
// now construct the actual varchar vector
|
|
59
59
|
varchar_list.Flatten(count);
|
|
@@ -116,7 +116,7 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
|
|
|
116
116
|
BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
117
117
|
switch (target.id()) {
|
|
118
118
|
case LogicalTypeId::LIST:
|
|
119
|
-
return BoundCastInfo(ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
|
|
119
|
+
return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
|
|
120
120
|
case LogicalTypeId::VARCHAR:
|
|
121
121
|
case LogicalTypeId::JSON:
|
|
122
122
|
return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
|
|
@@ -3,75 +3,28 @@
|
|
|
3
3
|
|
|
4
4
|
namespace duckdb {
|
|
5
5
|
|
|
6
|
-
struct MapBoundCastData : public BoundCastData {
|
|
7
|
-
MapBoundCastData(BoundCastInfo key_cast, BoundCastInfo value_cast)
|
|
8
|
-
: key_cast(move(key_cast)), value_cast(move(value_cast)) {
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
BoundCastInfo key_cast;
|
|
12
|
-
BoundCastInfo value_cast;
|
|
13
|
-
|
|
14
|
-
public:
|
|
15
|
-
unique_ptr<BoundCastData> Copy() const override {
|
|
16
|
-
return make_unique<MapBoundCastData>(key_cast.Copy(), value_cast.Copy());
|
|
17
|
-
}
|
|
18
|
-
};
|
|
19
|
-
|
|
20
|
-
unique_ptr<BoundCastData> BindMapToMapCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
21
|
-
vector<BoundCastInfo> child_cast_info;
|
|
22
|
-
auto source_key = LogicalType::LIST(MapType::KeyType(source));
|
|
23
|
-
auto target_key = LogicalType::LIST(MapType::KeyType(target));
|
|
24
|
-
auto source_val = LogicalType::LIST(MapType::ValueType(source));
|
|
25
|
-
auto target_val = LogicalType::LIST(MapType::ValueType(target));
|
|
26
|
-
auto key_cast = input.GetCastFunction(source_key, target_key);
|
|
27
|
-
auto value_cast = input.GetCastFunction(source_val, target_val);
|
|
28
|
-
return make_unique<MapBoundCastData>(move(key_cast), move(value_cast));
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
static bool MapToMapCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
|
32
|
-
auto &cast_data = (MapBoundCastData &)*parameters.cast_data;
|
|
33
|
-
CastParameters key_params(parameters, cast_data.key_cast.cast_data.get());
|
|
34
|
-
if (!cast_data.key_cast.function(MapVector::GetKeys(source), MapVector::GetKeys(result), count, key_params)) {
|
|
35
|
-
return false;
|
|
36
|
-
}
|
|
37
|
-
CastParameters val_params(parameters, cast_data.value_cast.cast_data.get());
|
|
38
|
-
if (!cast_data.value_cast.function(MapVector::GetValues(source), MapVector::GetValues(result), count, val_params)) {
|
|
39
|
-
return false;
|
|
40
|
-
}
|
|
41
|
-
if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
|
42
|
-
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
43
|
-
ConstantVector::SetNull(result, ConstantVector::IsNull(source));
|
|
44
|
-
} else {
|
|
45
|
-
source.Flatten(count);
|
|
46
|
-
FlatVector::Validity(result) = FlatVector::Validity(source);
|
|
47
|
-
}
|
|
48
|
-
return true;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
6
|
static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
|
52
7
|
auto constant = source.GetVectorType() == VectorType::CONSTANT_VECTOR;
|
|
53
|
-
// first cast the child elements to varchar
|
|
54
8
|
auto varchar_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
|
|
55
9
|
Vector varchar_map(varchar_type, count);
|
|
56
|
-
MapToMapCast(source, varchar_map, count, parameters);
|
|
57
10
|
|
|
58
|
-
//
|
|
59
|
-
varchar_map
|
|
11
|
+
// since map's physical type is a list, the ListCast can be utilized
|
|
12
|
+
ListCast::ListToListCast(source, varchar_map, count, parameters);
|
|
60
13
|
|
|
14
|
+
varchar_map.Flatten(count);
|
|
61
15
|
auto &validity = FlatVector::Validity(varchar_map);
|
|
62
|
-
auto &
|
|
63
|
-
auto &
|
|
64
|
-
auto &key_str = ListVector::GetEntry(key_lists);
|
|
65
|
-
auto &val_str = ListVector::GetEntry(val_lists);
|
|
16
|
+
auto &key_str = MapVector::GetKeys(varchar_map);
|
|
17
|
+
auto &val_str = MapVector::GetValues(varchar_map);
|
|
66
18
|
|
|
67
|
-
key_str.Flatten(ListVector::GetListSize(
|
|
68
|
-
val_str.Flatten(ListVector::GetListSize(
|
|
19
|
+
key_str.Flatten(ListVector::GetListSize(source));
|
|
20
|
+
val_str.Flatten(ListVector::GetListSize(source));
|
|
69
21
|
|
|
70
|
-
auto list_data =
|
|
22
|
+
auto list_data = ListVector::GetData(varchar_map);
|
|
71
23
|
auto key_data = FlatVector::GetData<string_t>(key_str);
|
|
72
24
|
auto val_data = FlatVector::GetData<string_t>(val_str);
|
|
73
25
|
auto &key_validity = FlatVector::Validity(key_str);
|
|
74
26
|
auto &val_validity = FlatVector::Validity(val_str);
|
|
27
|
+
auto &struct_validity = FlatVector::Validity(ListVector::GetEntry(varchar_map));
|
|
75
28
|
|
|
76
29
|
auto result_data = FlatVector::GetData<string_t>(result);
|
|
77
30
|
for (idx_t i = 0; i < count; i++) {
|
|
@@ -86,8 +39,15 @@ static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastPa
|
|
|
86
39
|
ret += ", ";
|
|
87
40
|
}
|
|
88
41
|
auto idx = list.offset + list_idx;
|
|
42
|
+
|
|
43
|
+
if (!struct_validity.RowIsValid(idx)) {
|
|
44
|
+
ret += "NULL";
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
89
47
|
if (!key_validity.RowIsValid(idx)) {
|
|
90
|
-
throw InternalException("Error in map: key validity invalid?!");
|
|
48
|
+
// throw InternalException("Error in map: key validity invalid?!");
|
|
49
|
+
ret += "invalid";
|
|
50
|
+
continue;
|
|
91
51
|
}
|
|
92
52
|
ret += key_data[idx].GetString();
|
|
93
53
|
ret += "=";
|
|
@@ -106,12 +66,11 @@ static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastPa
|
|
|
106
66
|
BoundCastInfo DefaultCasts::MapCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
107
67
|
switch (target.id()) {
|
|
108
68
|
case LogicalTypeId::MAP:
|
|
109
|
-
return BoundCastInfo(
|
|
69
|
+
return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
|
|
110
70
|
case LogicalTypeId::JSON:
|
|
111
71
|
case LogicalTypeId::VARCHAR: {
|
|
112
|
-
// bind a cast in which we convert the key/value to VARCHAR entries
|
|
113
72
|
auto varchar_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
|
|
114
|
-
return BoundCastInfo(MapToVarcharCast,
|
|
73
|
+
return BoundCastInfo(MapToVarcharCast, ListBoundCastData::BindListToListCast(input, source, varchar_type));
|
|
115
74
|
}
|
|
116
75
|
default:
|
|
117
76
|
return TryVectorNullCast;
|
|
@@ -140,7 +140,6 @@ static void ListContainsOrPosition(DataChunk &args, ExpressionState &state, Vect
|
|
|
140
140
|
case PhysicalType::VARCHAR:
|
|
141
141
|
TemplatedContainsOrPosition<string_t, T, OP>(args, state, result);
|
|
142
142
|
break;
|
|
143
|
-
case PhysicalType::MAP:
|
|
144
143
|
case PhysicalType::STRUCT:
|
|
145
144
|
case PhysicalType::LIST:
|
|
146
145
|
TemplatedContainsOrPosition<int8_t, T, OP>(args, state, result, true);
|
|
@@ -8,17 +8,14 @@ namespace duckdb {
|
|
|
8
8
|
|
|
9
9
|
static void CardinalityFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
10
10
|
auto &map = args.data[0];
|
|
11
|
-
UnifiedVectorFormat list_data;
|
|
12
11
|
UnifiedVectorFormat map_data;
|
|
13
12
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
14
13
|
auto result_data = FlatVector::GetData<uint64_t>(result);
|
|
15
14
|
auto &result_validity = FlatVector::Validity(result);
|
|
16
15
|
|
|
17
16
|
map.ToUnifiedFormat(args.size(), map_data);
|
|
18
|
-
auto &children = StructVector::GetEntries(map);
|
|
19
|
-
children[0]->ToUnifiedFormat(args.size(), list_data);
|
|
20
17
|
for (idx_t row = 0; row < args.size(); row++) {
|
|
21
|
-
auto list_entry = ((list_entry_t *)
|
|
18
|
+
auto list_entry = ((list_entry_t *)map_data.data)[map_data.sel->get_index(row)];
|
|
22
19
|
result_data[row] = list_entry.length;
|
|
23
20
|
result_validity.Set(row, map_data.validity.RowIsValid(map_data.sel->get_index(row)));
|
|
24
21
|
}
|
|
@@ -2,31 +2,25 @@
|
|
|
2
2
|
#include "duckdb/common/string_util.hpp"
|
|
3
3
|
#include "duckdb/parser/expression/bound_expression.hpp"
|
|
4
4
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
|
5
|
-
#include "duckdb/function/aggregate/nested_functions.hpp"
|
|
6
5
|
#include "duckdb/common/types/data_chunk.hpp"
|
|
7
6
|
#include "duckdb/common/pair.hpp"
|
|
8
7
|
#include "duckdb/common/types/value_map.hpp"
|
|
9
8
|
|
|
10
9
|
namespace duckdb {
|
|
11
10
|
|
|
12
|
-
// TODO: this doesn't recursively verify maps if maps are nested
|
|
13
11
|
MapInvalidReason CheckMapValidity(Vector &map, idx_t count, const SelectionVector &sel) {
|
|
14
12
|
D_ASSERT(map.GetType().id() == LogicalTypeId::MAP);
|
|
15
13
|
UnifiedVectorFormat map_vdata;
|
|
14
|
+
|
|
16
15
|
map.ToUnifiedFormat(count, map_vdata);
|
|
17
16
|
auto &map_validity = map_vdata.validity;
|
|
18
17
|
|
|
19
|
-
auto
|
|
18
|
+
auto list_data = ListVector::GetData(map);
|
|
19
|
+
auto &keys = MapVector::GetKeys(map);
|
|
20
20
|
UnifiedVectorFormat key_vdata;
|
|
21
|
-
|
|
22
|
-
auto key_data = (list_entry_t *)key_vdata.data;
|
|
21
|
+
keys.ToUnifiedFormat(count, key_vdata);
|
|
23
22
|
auto &key_validity = key_vdata.validity;
|
|
24
23
|
|
|
25
|
-
auto &key_entries = ListVector::GetEntry(key_vector);
|
|
26
|
-
UnifiedVectorFormat key_entry_vdata;
|
|
27
|
-
key_entries.ToUnifiedFormat(count, key_entry_vdata);
|
|
28
|
-
auto &entry_validity = key_entry_vdata.validity;
|
|
29
|
-
|
|
30
24
|
for (idx_t row = 0; row < count; row++) {
|
|
31
25
|
auto mapped_row = sel.get_index(row);
|
|
32
26
|
auto row_idx = map_vdata.sel->get_index(mapped_row);
|
|
@@ -35,17 +29,14 @@ MapInvalidReason CheckMapValidity(Vector &map, idx_t count, const SelectionVecto
|
|
|
35
29
|
continue;
|
|
36
30
|
}
|
|
37
31
|
row_idx = key_vdata.sel->get_index(row);
|
|
38
|
-
if (!key_validity.RowIsValid(row_idx)) {
|
|
39
|
-
return MapInvalidReason::NULL_KEY_LIST;
|
|
40
|
-
}
|
|
41
32
|
value_set_t unique_keys;
|
|
42
|
-
for (idx_t i = 0; i <
|
|
43
|
-
auto index =
|
|
44
|
-
index =
|
|
45
|
-
if (!
|
|
33
|
+
for (idx_t i = 0; i < list_data[row_idx].length; i++) {
|
|
34
|
+
auto index = list_data[row_idx].offset + i;
|
|
35
|
+
index = key_vdata.sel->get_index(index);
|
|
36
|
+
if (!key_validity.RowIsValid(index)) {
|
|
46
37
|
return MapInvalidReason::NULL_KEY;
|
|
47
38
|
}
|
|
48
|
-
auto value =
|
|
39
|
+
auto value = keys.GetValue(index);
|
|
49
40
|
auto result = unique_keys.insert(value);
|
|
50
41
|
if (!result.second) {
|
|
51
42
|
return MapInvalidReason::DUPLICATE_KEY;
|
|
@@ -86,28 +77,19 @@ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result)
|
|
|
86
77
|
}
|
|
87
78
|
}
|
|
88
79
|
|
|
89
|
-
auto &
|
|
90
|
-
|
|
91
|
-
auto
|
|
92
|
-
auto &value_vector = *child_entries[1];
|
|
93
|
-
if (args.data.empty()) {
|
|
94
|
-
// no arguments: construct an empty map
|
|
95
|
-
ListVector::SetListSize(key_vector, 0);
|
|
96
|
-
key_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
97
|
-
auto list_data = ConstantVector::GetData<list_entry_t>(key_vector);
|
|
98
|
-
list_data->offset = 0;
|
|
99
|
-
list_data->length = 0;
|
|
80
|
+
auto &key_vector = MapVector::GetKeys(result);
|
|
81
|
+
auto &value_vector = MapVector::GetValues(result);
|
|
82
|
+
auto list_data = ListVector::GetData(result);
|
|
100
83
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
list_data = ConstantVector::GetData<list_entry_t>(value_vector);
|
|
84
|
+
if (args.data.empty()) {
|
|
85
|
+
ListVector::SetListSize(result, 0);
|
|
104
86
|
list_data->offset = 0;
|
|
105
87
|
list_data->length = 0;
|
|
106
|
-
|
|
107
88
|
result.Verify(args.size());
|
|
108
89
|
return;
|
|
109
90
|
}
|
|
110
91
|
|
|
92
|
+
auto args_data = ListVector::GetData(args.data[0]);
|
|
111
93
|
auto key_count = ListVector::GetListSize(args.data[0]);
|
|
112
94
|
auto value_count = ListVector::GetListSize(args.data[1]);
|
|
113
95
|
if (key_count != value_count) {
|
|
@@ -115,11 +97,16 @@ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result)
|
|
|
115
97
|
"Error in MAP creation: key list has a different size from value list (%lld keys, %lld values)", key_count,
|
|
116
98
|
value_count);
|
|
117
99
|
}
|
|
100
|
+
ListVector::Reserve(result, key_count);
|
|
101
|
+
ListVector::SetListSize(result, key_count);
|
|
118
102
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
103
|
+
for (idx_t i = 0; i < args.size(); i++) {
|
|
104
|
+
list_data[i] = args_data[i];
|
|
105
|
+
}
|
|
122
106
|
|
|
107
|
+
key_vector.Reference(ListVector::GetEntry(args.data[0]));
|
|
108
|
+
value_vector.Reference(ListVector::GetEntry(args.data[1]));
|
|
109
|
+
MapConversionVerify(result, args.size());
|
|
123
110
|
result.Verify(args.size());
|
|
124
111
|
}
|
|
125
112
|
|
|
@@ -147,8 +134,9 @@ static unique_ptr<FunctionData> MapBind(ClientContext &context, ScalarFunction &
|
|
|
147
134
|
child_types.push_back(make_pair("value", empty));
|
|
148
135
|
}
|
|
149
136
|
|
|
150
|
-
|
|
151
|
-
|
|
137
|
+
bound_function.return_type =
|
|
138
|
+
LogicalType::MAP(ListType::GetChildType(child_types[0].second), ListType::GetChildType(child_types[1].second));
|
|
139
|
+
|
|
152
140
|
return make_unique<VariableReturnBindData>(bound_function.return_type);
|
|
153
141
|
}
|
|
154
142
|
|
|
@@ -27,7 +27,6 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
|
|
|
27
27
|
|
|
28
28
|
if (args.data[1].GetType().id() == LogicalTypeId::SQLNULL) {
|
|
29
29
|
//! We don't need to look through the map if the 'key' to look for is NULL
|
|
30
|
-
//! Because maps can't have NULL as key
|
|
31
30
|
ListVector::SetListSize(result, 0);
|
|
32
31
|
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
33
32
|
auto list_data = ConstantVector::GetData<list_entry_t>(result);
|
|
@@ -40,21 +39,23 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
|
|
|
40
39
|
auto &map = args.data[0];
|
|
41
40
|
auto &key = args.data[1];
|
|
42
41
|
|
|
43
|
-
UnifiedVectorFormat
|
|
42
|
+
UnifiedVectorFormat map_data;
|
|
44
43
|
UnifiedVectorFormat key_data;
|
|
45
44
|
|
|
46
45
|
auto &map_keys = MapVector::GetKeys(map);
|
|
47
46
|
auto &map_values = MapVector::GetValues(map);
|
|
48
47
|
|
|
49
|
-
|
|
48
|
+
map.ToUnifiedFormat(args.size(), map_data);
|
|
50
49
|
key.ToUnifiedFormat(args.size(), key_data);
|
|
51
50
|
|
|
52
51
|
for (idx_t row = 0; row < args.size(); row++) {
|
|
53
|
-
idx_t row_index =
|
|
52
|
+
idx_t row_index = map_data.sel->get_index(row);
|
|
54
53
|
idx_t key_index = key_data.sel->get_index(row);
|
|
55
54
|
auto key_value = key.GetValue(key_index);
|
|
56
|
-
|
|
57
|
-
|
|
55
|
+
|
|
56
|
+
list_entry_t entry = ListVector::GetData(map)[row_index];
|
|
57
|
+
auto offsets = MapVector::Search(map_keys, args.size(), key_value, entry);
|
|
58
|
+
auto values = FlatVector::GetValuesFromOffsets(map_values, offsets);
|
|
58
59
|
FillResult(values, result, row);
|
|
59
60
|
}
|
|
60
61
|
|
|
@@ -3,124 +3,14 @@
|
|
|
3
3
|
#include "duckdb/parser/expression/bound_expression.hpp"
|
|
4
4
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
|
5
5
|
#include "duckdb/common/types/data_chunk.hpp"
|
|
6
|
-
#include "duckdb/common/pair.hpp"
|
|
7
6
|
|
|
8
7
|
namespace duckdb {
|
|
9
8
|
|
|
10
|
-
struct VectorInfo {
|
|
11
|
-
Vector &container;
|
|
12
|
-
list_entry_t &data;
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
static void MapStruct(Value &element, VectorInfo &keys, VectorInfo &values) {
|
|
16
|
-
D_ASSERT(element.type().id() == LogicalTypeId::STRUCT);
|
|
17
|
-
D_ASSERT(!element.IsNull());
|
|
18
|
-
auto &key_value = StructValue::GetChildren(element);
|
|
19
|
-
auto &key = key_value[0];
|
|
20
|
-
auto &value = key_value[1];
|
|
21
|
-
|
|
22
|
-
if (key.IsNull()) {
|
|
23
|
-
throw InvalidInputException("None of the keys of the map can be NULL");
|
|
24
|
-
}
|
|
25
|
-
// Add to the inner key/value lists of the resulting map
|
|
26
|
-
ListVector::PushBack(keys.container, key);
|
|
27
|
-
ListVector::PushBack(values.container, value);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
// FIXME: this operation has a time complexity of O(n^2)
|
|
31
|
-
void CheckKeyUniqueness(VectorInfo &keys) {
|
|
32
|
-
auto end = keys.data.offset + keys.data.length;
|
|
33
|
-
auto &entries = ListVector::GetEntry(keys.container);
|
|
34
|
-
for (auto lhs = keys.data.offset; lhs < end; lhs++) {
|
|
35
|
-
auto element = entries.GetValue(lhs);
|
|
36
|
-
D_ASSERT(!element.IsNull());
|
|
37
|
-
for (auto rhs = lhs + 1; rhs < end; rhs++) {
|
|
38
|
-
auto other = entries.GetValue(rhs);
|
|
39
|
-
D_ASSERT(!other.IsNull());
|
|
40
|
-
|
|
41
|
-
if (element.type() != other.type()) {
|
|
42
|
-
throw InvalidInputException("Not all keys are of the same type!");
|
|
43
|
-
}
|
|
44
|
-
if (element == other) {
|
|
45
|
-
throw InvalidInputException("The given keys aren't unique");
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
static bool MapSingleList(VectorInfo &input, VectorInfo &keys, VectorInfo &values) {
|
|
52
|
-
// Get the length and offset of this list from the argument data
|
|
53
|
-
auto pair_amount = input.data.length;
|
|
54
|
-
auto input_offset = input.data.offset;
|
|
55
|
-
|
|
56
|
-
// Loop over the list of structs
|
|
57
|
-
idx_t inserted_values = 0;
|
|
58
|
-
for (idx_t i = 0; i < pair_amount; i++) {
|
|
59
|
-
auto index = i + input_offset;
|
|
60
|
-
// Get the struct using the offset and the index;
|
|
61
|
-
auto element = input.container.GetValue(index);
|
|
62
|
-
if (element.IsNull()) {
|
|
63
|
-
continue;
|
|
64
|
-
}
|
|
65
|
-
MapStruct(element, keys, values);
|
|
66
|
-
inserted_values++;
|
|
67
|
-
}
|
|
68
|
-
// Set the length of the key value lists
|
|
69
|
-
keys.data.length = inserted_values;
|
|
70
|
-
values.data.length = inserted_values;
|
|
71
|
-
return inserted_values != 0;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
9
|
static void MapFromEntriesFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
75
|
-
D_ASSERT(result.GetType().id() == LogicalTypeId::MAP);
|
|
76
|
-
|
|
77
|
-
result.SetVectorType(duckdb::VectorType::FLAT_VECTOR);
|
|
78
|
-
|
|
79
|
-
// Get the arguments vector
|
|
80
|
-
auto &input_list = args.data[0];
|
|
81
|
-
auto arg_data = FlatVector::GetData<list_entry_t>(input_list);
|
|
82
|
-
auto &entries = ListVector::GetEntry(input_list);
|
|
83
|
-
|
|
84
|
-
// Prepare the result vectors
|
|
85
|
-
auto &child_entries = StructVector::GetEntries(result);
|
|
86
|
-
D_ASSERT(child_entries.size() == 2);
|
|
87
|
-
auto &key_vector = *child_entries[0];
|
|
88
|
-
auto &value_vector = *child_entries[1];
|
|
89
|
-
auto &result_validity = FlatVector::Validity(result);
|
|
90
|
-
|
|
91
|
-
// Get the offset+length data for the list(s)
|
|
92
|
-
auto key_data = FlatVector::GetData<list_entry_t>(key_vector);
|
|
93
|
-
auto value_data = FlatVector::GetData<list_entry_t>(value_vector);
|
|
94
|
-
|
|
95
|
-
auto &key_validity = FlatVector::Validity(key_vector);
|
|
96
|
-
auto &value_validity = FlatVector::Validity(value_vector);
|
|
97
|
-
|
|
98
10
|
auto count = args.size();
|
|
99
11
|
|
|
100
|
-
|
|
101
|
-
input_list.ToUnifiedFormat(count, input_list_data);
|
|
102
|
-
|
|
103
|
-
// Current offset into the keys/values list
|
|
104
|
-
idx_t offset = 0;
|
|
105
|
-
|
|
106
|
-
// Transform to mapped values
|
|
107
|
-
for (idx_t i = 0; i < count; i++) {
|
|
108
|
-
VectorInfo input {entries, arg_data[i]};
|
|
109
|
-
VectorInfo keys {key_vector, key_data[i]};
|
|
110
|
-
VectorInfo values {value_vector, value_data[i]};
|
|
12
|
+
result.Reinterpret(args.data[0]);
|
|
111
13
|
|
|
112
|
-
keys.data.offset = offset;
|
|
113
|
-
values.data.offset = offset;
|
|
114
|
-
auto row_valid = MapSingleList(input, keys, values);
|
|
115
|
-
offset += keys.data.length;
|
|
116
|
-
|
|
117
|
-
// Check validity
|
|
118
|
-
if (!row_valid || !input_list_data.validity.RowIsValid(i)) {
|
|
119
|
-
key_validity.SetInvalid(i);
|
|
120
|
-
value_validity.SetInvalid(i);
|
|
121
|
-
result_validity.SetInvalid(i);
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
14
|
MapConversionVerify(result, count);
|
|
125
15
|
result.Verify(count);
|
|
126
16
|
|
|
@@ -131,8 +21,6 @@ static void MapFromEntriesFunction(DataChunk &args, ExpressionState &state, Vect
|
|
|
131
21
|
|
|
132
22
|
static unique_ptr<FunctionData> MapFromEntriesBind(ClientContext &context, ScalarFunction &bound_function,
|
|
133
23
|
vector<unique_ptr<Expression>> &arguments) {
|
|
134
|
-
child_list_t<LogicalType> child_types;
|
|
135
|
-
|
|
136
24
|
if (arguments.size() != 1) {
|
|
137
25
|
throw InvalidInputException("The input argument must be a list of structs.");
|
|
138
26
|
}
|
|
@@ -155,11 +43,8 @@ static unique_ptr<FunctionData> MapFromEntriesBind(ClientContext &context, Scala
|
|
|
155
43
|
if (children.size() != 2) {
|
|
156
44
|
throw InvalidInputException("The provided struct type should only contain 2 fields, a key and a value");
|
|
157
45
|
}
|
|
158
|
-
child_types.push_back(make_pair("key", LogicalType::LIST(children[0].second)));
|
|
159
|
-
child_types.push_back(make_pair("value", LogicalType::LIST(children[1].second)));
|
|
160
46
|
|
|
161
|
-
|
|
162
|
-
bound_function.return_type = LogicalType::MAP(move(child_types));
|
|
47
|
+
bound_function.return_type = LogicalType::MAP(elem_type);
|
|
163
48
|
return make_unique<VariableReturnBindData>(bound_function.return_type);
|
|
164
49
|
}
|
|
165
50
|
|
|
@@ -124,17 +124,13 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
|
|
|
124
124
|
return LogicalType::STRUCT(move(child_types));
|
|
125
125
|
|
|
126
126
|
} else if (format == "+m") {
|
|
127
|
-
|
|
128
|
-
//! First type will be struct, so we skip it
|
|
129
|
-
auto &struct_schema = *schema.children[0];
|
|
130
|
-
for (idx_t type_idx = 0; type_idx < (idx_t)struct_schema.n_children; type_idx++) {
|
|
131
|
-
//! The other types must be added on lists
|
|
132
|
-
auto child_type = GetArrowLogicalType(*struct_schema.children[type_idx], arrow_convert_data, col_idx);
|
|
127
|
+
arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
|
|
133
128
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
129
|
+
auto &arrow_struct_type = *schema.children[0];
|
|
130
|
+
D_ASSERT(arrow_struct_type.n_children == 2);
|
|
131
|
+
auto key_type = GetArrowLogicalType(*arrow_struct_type.children[0], arrow_convert_data, col_idx);
|
|
132
|
+
auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1], arrow_convert_data, col_idx);
|
|
133
|
+
return LogicalType::MAP(key_type, value_type);
|
|
138
134
|
} else if (format == "z") {
|
|
139
135
|
arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
|
|
140
136
|
return LogicalType::BLOB;
|
|
@@ -227,47 +227,6 @@ void ArrowToDuckDBMapVerify(Vector &vector, idx_t count) {
|
|
|
227
227
|
}
|
|
228
228
|
}
|
|
229
229
|
|
|
230
|
-
void ArrowToDuckDBMapList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
|
231
|
-
unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
|
|
232
|
-
pair<idx_t, idx_t> &arrow_convert_idx, uint32_t *offsets, ValidityMask *parent_mask) {
|
|
233
|
-
idx_t list_size = offsets[size] - offsets[0];
|
|
234
|
-
ListVector::Reserve(vector, list_size);
|
|
235
|
-
|
|
236
|
-
auto &child_vector = ListVector::GetEntry(vector);
|
|
237
|
-
auto list_data = FlatVector::GetData<list_entry_t>(vector);
|
|
238
|
-
auto cur_offset = 0;
|
|
239
|
-
for (idx_t i = 0; i < size; i++) {
|
|
240
|
-
auto &le = list_data[i];
|
|
241
|
-
le.offset = cur_offset;
|
|
242
|
-
le.length = offsets[i + 1] - offsets[i];
|
|
243
|
-
cur_offset += le.length;
|
|
244
|
-
}
|
|
245
|
-
ListVector::SetListSize(vector, list_size);
|
|
246
|
-
if (list_size == 0 && offsets[0] == 0) {
|
|
247
|
-
SetValidityMask(child_vector, array, scan_state, list_size, -1);
|
|
248
|
-
} else {
|
|
249
|
-
SetValidityMask(child_vector, array, scan_state, list_size, offsets[0]);
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
auto &list_mask = FlatVector::Validity(vector);
|
|
253
|
-
if (parent_mask) {
|
|
254
|
-
//! Since this List is owned by a struct we must guarantee their validity map matches on Null
|
|
255
|
-
if (!parent_mask->AllValid()) {
|
|
256
|
-
for (idx_t i = 0; i < size; i++) {
|
|
257
|
-
if (!parent_mask->RowIsValid(i)) {
|
|
258
|
-
list_mask.SetInvalid(i);
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
if (list_size == 0 && offsets[0] == 0) {
|
|
264
|
-
ColumnArrowToDuckDB(child_vector, array, scan_state, list_size, arrow_convert_data, col_idx, arrow_convert_idx,
|
|
265
|
-
-1);
|
|
266
|
-
} else {
|
|
267
|
-
ColumnArrowToDuckDB(child_vector, array, scan_state, list_size, arrow_convert_data, col_idx, arrow_convert_idx,
|
|
268
|
-
offsets[0]);
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
230
|
template <class T>
|
|
272
231
|
static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets) {
|
|
273
232
|
auto strings = FlatVector::GetData<string_t>(vector);
|
|
@@ -619,20 +578,8 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
|
619
578
|
break;
|
|
620
579
|
}
|
|
621
580
|
case LogicalTypeId::MAP: {
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
auto &child_entries = StructVector::GetEntries(vector);
|
|
625
|
-
D_ASSERT(child_entries.size() == 2);
|
|
626
|
-
auto offsets = (uint32_t *)array.buffers[1] + array.offset + scan_state.chunk_offset;
|
|
627
|
-
if (nested_offset != -1) {
|
|
628
|
-
offsets = (uint32_t *)array.buffers[1] + nested_offset;
|
|
629
|
-
}
|
|
630
|
-
auto &struct_validity_mask = FlatVector::Validity(vector);
|
|
631
|
-
//! Fill the children
|
|
632
|
-
for (idx_t type_idx = 0; type_idx < (idx_t)struct_arrow.n_children; type_idx++) {
|
|
633
|
-
ArrowToDuckDBMapList(*child_entries[type_idx], *struct_arrow.children[type_idx], scan_state, size,
|
|
634
|
-
arrow_convert_data, col_idx, arrow_convert_idx, offsets, &struct_validity_mask);
|
|
635
|
-
}
|
|
581
|
+
ArrowToDuckDBList(vector, array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx,
|
|
582
|
+
nested_offset, parent_mask);
|
|
636
583
|
ArrowToDuckDBMapVerify(vector, size);
|
|
637
584
|
break;
|
|
638
585
|
}
|
|
@@ -177,9 +177,20 @@ vector<TestType> TestAllTypesFun::GetTestTypes() {
|
|
|
177
177
|
|
|
178
178
|
// map
|
|
179
179
|
auto map_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
|
|
180
|
-
auto min_map_value = Value::MAP(
|
|
181
|
-
|
|
182
|
-
|
|
180
|
+
auto min_map_value = Value::MAP(ListType::GetChildType(map_type), std::vector<Value>());
|
|
181
|
+
|
|
182
|
+
child_list_t<Value> map_struct1;
|
|
183
|
+
map_struct1.push_back(make_pair("key", Value("key1")));
|
|
184
|
+
map_struct1.push_back(make_pair("value", Value("🦆🦆🦆🦆🦆🦆")));
|
|
185
|
+
child_list_t<Value> map_struct2;
|
|
186
|
+
map_struct2.push_back(make_pair("key", Value("key2")));
|
|
187
|
+
map_struct2.push_back(make_pair("key", Value("goose")));
|
|
188
|
+
|
|
189
|
+
std::vector<Value> map_values;
|
|
190
|
+
map_values.push_back(Value::STRUCT(map_struct1));
|
|
191
|
+
map_values.push_back(Value::STRUCT(map_struct2));
|
|
192
|
+
|
|
193
|
+
auto max_map_value = Value::MAP(ListType::GetChildType(map_type), map_values);
|
|
183
194
|
result.emplace_back(map_type, "map", move(min_map_value), move(max_map_value));
|
|
184
195
|
|
|
185
196
|
return result;
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
|
2
|
-
#define DUCKDB_VERSION "0.6.2-
|
|
2
|
+
#define DUCKDB_VERSION "0.6.2-dev939"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
|
5
|
+
#define DUCKDB_SOURCE_ID "db2bb06ef5"
|
|
6
6
|
#endif
|
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
|
8
8
|
#include "duckdb/main/database.hpp"
|
|
@@ -157,8 +157,8 @@ public:
|
|
|
157
157
|
DUCKDB_API static Value LIST(LogicalType child_type, vector<Value> values);
|
|
158
158
|
//! Create an empty list with the specified child-type
|
|
159
159
|
DUCKDB_API static Value EMPTYLIST(LogicalType child_type);
|
|
160
|
-
//! Create a map value
|
|
161
|
-
DUCKDB_API static Value MAP(
|
|
160
|
+
//! Create a map value with the given entries
|
|
161
|
+
DUCKDB_API static Value MAP(LogicalType child_type, vector<Value> values);
|
|
162
162
|
//! Create a union value from a selected value and a tag from a set of alternatives.
|
|
163
163
|
DUCKDB_API static Value UNION(child_list_t<LogicalType> members, uint8_t tag, Value value);
|
|
164
164
|
|
|
@@ -304,6 +304,7 @@ struct FlatVector {
|
|
|
304
304
|
return !vector.validity.RowIsValid(idx);
|
|
305
305
|
}
|
|
306
306
|
DUCKDB_API static const SelectionVector *IncrementalSelectionVector();
|
|
307
|
+
static Value GetValuesFromOffsets(Vector &values, vector<idx_t> &offsets);
|
|
307
308
|
};
|
|
308
309
|
|
|
309
310
|
struct ListVector {
|
|
@@ -330,8 +331,6 @@ struct ListVector {
|
|
|
330
331
|
DUCKDB_API static void Append(Vector &target, const Vector &source, const SelectionVector &sel, idx_t source_size,
|
|
331
332
|
idx_t source_offset = 0);
|
|
332
333
|
DUCKDB_API static void PushBack(Vector &target, const Value &insert);
|
|
333
|
-
DUCKDB_API static vector<idx_t> Search(Vector &list, const Value &key, idx_t row);
|
|
334
|
-
DUCKDB_API static Value GetValuesFromOffsets(Vector &list, vector<idx_t> &offsets);
|
|
335
334
|
//! Share the entry of the other list vector
|
|
336
335
|
DUCKDB_API static void ReferenceEntry(Vector &vector, Vector &other);
|
|
337
336
|
};
|
|
@@ -409,6 +408,7 @@ struct MapVector {
|
|
|
409
408
|
DUCKDB_API static const Vector &GetValues(const Vector &vector);
|
|
410
409
|
DUCKDB_API static Vector &GetKeys(Vector &vector);
|
|
411
410
|
DUCKDB_API static Vector &GetValues(Vector &vector);
|
|
411
|
+
static vector<idx_t> Search(Vector &keys, idx_t count, const Value &key, list_entry_t &entry);
|
|
412
412
|
};
|
|
413
413
|
|
|
414
414
|
struct StructVector {
|