duckdb 0.6.2-dev919.0 → 0.6.2-dev942.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +6 -12
- package/src/duckdb/extension/parquet/column_writer.cpp +6 -5
- package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -8
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -15
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -5
- package/src/duckdb/src/common/row_operations/row_gather.cpp +0 -1
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -1
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +0 -2
- package/src/duckdb/src/common/sort/sort_state.cpp +0 -2
- package/src/duckdb/src/common/types/value.cpp +6 -8
- package/src/duckdb/src/common/types/vector.cpp +40 -43
- package/src/duckdb/src/common/types.cpp +20 -25
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +0 -1
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +0 -2
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +0 -2
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +0 -1
- package/src/duckdb/src/execution/window_segment_tree.cpp +0 -17
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +22 -0
- package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +0 -1
- package/src/duckdb/src/function/aggregate/nested/histogram.cpp +7 -24
- package/src/duckdb/src/function/cast/list_casts.cpp +3 -3
- package/src/duckdb/src/function/cast/map_cast.cpp +19 -60
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +0 -1
- package/src/duckdb/src/function/scalar/map/cardinality.cpp +1 -4
- package/src/duckdb/src/function/scalar/map/map.cpp +26 -38
- package/src/duckdb/src/function/scalar/map/map_extract.cpp +7 -6
- package/src/duckdb/src/function/scalar/map/map_from_entries.cpp +2 -117
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +1 -1
- package/src/duckdb/src/function/table/arrow.cpp +6 -10
- package/src/duckdb/src/function/table/arrow_conversion.cpp +2 -55
- package/src/duckdb/src/function/table/system/test_all_types.cpp +14 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/types.hpp +1 -3
- package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +4 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +3 -2
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +1 -8
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +11 -9
- package/src/duckdb/src/planner/expression_binder.cpp +6 -6
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
|
@@ -268,7 +268,6 @@ static bool TemplatedOptimumValue(Vector &left, idx_t lidx, idx_t lcount, Vector
|
|
|
268
268
|
return TemplatedOptimumType<string_t, OP>(left, lidx, lcount, right, ridx, rcount);
|
|
269
269
|
case PhysicalType::LIST:
|
|
270
270
|
return TemplatedOptimumList<OP>(left, lidx, lcount, right, ridx, rcount);
|
|
271
|
-
case PhysicalType::MAP:
|
|
272
271
|
case PhysicalType::STRUCT:
|
|
273
272
|
return TemplatedOptimumStruct<OP>(left, lidx, lcount, right, ridx, rcount);
|
|
274
273
|
default:
|
|
@@ -121,40 +121,26 @@ static void HistogramFinalizeFunction(Vector &state_vector, AggregateInputData &
|
|
|
121
121
|
auto states = (HistogramAggState<T, MAP_TYPE> **)sdata.data;
|
|
122
122
|
|
|
123
123
|
auto &mask = FlatVector::Validity(result);
|
|
124
|
-
|
|
125
|
-
auto &child_entries = StructVector::GetEntries(result);
|
|
126
|
-
auto &bucket_list = child_entries[0];
|
|
127
|
-
auto &count_list = child_entries[1];
|
|
128
|
-
|
|
129
|
-
auto old_len = ListVector::GetListSize(*bucket_list);
|
|
130
|
-
|
|
131
|
-
auto &bucket_validity = FlatVector::Validity(*bucket_list);
|
|
132
|
-
auto &count_validity = FlatVector::Validity(*count_list);
|
|
124
|
+
auto old_len = ListVector::GetListSize(result);
|
|
133
125
|
|
|
134
126
|
for (idx_t i = 0; i < count; i++) {
|
|
135
|
-
|
|
136
127
|
const auto rid = i + offset;
|
|
137
128
|
auto state = states[sdata.sel->get_index(i)];
|
|
138
129
|
if (!state->hist) {
|
|
139
130
|
mask.SetInvalid(rid);
|
|
140
|
-
bucket_validity.SetInvalid(rid);
|
|
141
|
-
count_validity.SetInvalid(rid);
|
|
142
131
|
continue;
|
|
143
132
|
}
|
|
144
133
|
|
|
145
134
|
for (auto &entry : *state->hist) {
|
|
146
135
|
Value bucket_value = OP::template HistogramFinalize<T>(entry.first);
|
|
147
|
-
ListVector::PushBack(*bucket_list, bucket_value);
|
|
148
136
|
auto count_value = Value::CreateValue(entry.second);
|
|
149
|
-
|
|
137
|
+
auto struct_value =
|
|
138
|
+
Value::STRUCT({std::make_pair("key", bucket_value), std::make_pair("value", count_value)});
|
|
139
|
+
ListVector::PushBack(result, struct_value);
|
|
150
140
|
}
|
|
151
141
|
|
|
152
|
-
auto list_struct_data =
|
|
153
|
-
list_struct_data[rid].length = ListVector::GetListSize(
|
|
154
|
-
list_struct_data[rid].offset = old_len;
|
|
155
|
-
|
|
156
|
-
list_struct_data = FlatVector::GetData<list_entry_t>(*count_list);
|
|
157
|
-
list_struct_data[rid].length = ListVector::GetListSize(*count_list) - old_len;
|
|
142
|
+
auto list_struct_data = ListVector::GetData(result);
|
|
143
|
+
list_struct_data[rid].length = ListVector::GetListSize(result) - old_len;
|
|
158
144
|
list_struct_data[rid].offset = old_len;
|
|
159
145
|
old_len += list_struct_data[rid].length;
|
|
160
146
|
}
|
|
@@ -171,10 +157,7 @@ unique_ptr<FunctionData> HistogramBindFunction(ClientContext &context, Aggregate
|
|
|
171
157
|
throw NotImplementedException("Unimplemented type for histogram %s", arguments[0]->return_type.ToString());
|
|
172
158
|
}
|
|
173
159
|
|
|
174
|
-
|
|
175
|
-
struct_children.push_back({"key", LogicalType::LIST(arguments[0]->return_type)});
|
|
176
|
-
struct_children.push_back({"value", LogicalType::LIST(LogicalType::UBIGINT)});
|
|
177
|
-
auto struct_type = LogicalType::MAP(move(struct_children));
|
|
160
|
+
auto struct_type = LogicalType::MAP(arguments[0]->return_type, LogicalType::UBIGINT);
|
|
178
161
|
|
|
179
162
|
function.return_type = struct_type;
|
|
180
163
|
return make_unique<VariableReturnBindData>(function.return_type);
|
|
@@ -12,7 +12,7 @@ unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &i
|
|
|
12
12
|
return make_unique<ListBoundCastData>(move(child_cast));
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
bool ListCast::ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
|
16
16
|
auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
|
|
17
17
|
|
|
18
18
|
// only handle constant and flat vectors here for now
|
|
@@ -53,7 +53,7 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
|
|
|
53
53
|
auto constant = source.GetVectorType() == VectorType::CONSTANT_VECTOR;
|
|
54
54
|
// first cast the child vector to varchar
|
|
55
55
|
Vector varchar_list(LogicalType::LIST(LogicalType::VARCHAR), count);
|
|
56
|
-
ListToListCast(source, varchar_list, count, parameters);
|
|
56
|
+
ListCast::ListToListCast(source, varchar_list, count, parameters);
|
|
57
57
|
|
|
58
58
|
// now construct the actual varchar vector
|
|
59
59
|
varchar_list.Flatten(count);
|
|
@@ -116,7 +116,7 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
|
|
|
116
116
|
BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
117
117
|
switch (target.id()) {
|
|
118
118
|
case LogicalTypeId::LIST:
|
|
119
|
-
return BoundCastInfo(ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
|
|
119
|
+
return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
|
|
120
120
|
case LogicalTypeId::VARCHAR:
|
|
121
121
|
case LogicalTypeId::JSON:
|
|
122
122
|
return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
|
|
@@ -3,75 +3,28 @@
|
|
|
3
3
|
|
|
4
4
|
namespace duckdb {
|
|
5
5
|
|
|
6
|
-
struct MapBoundCastData : public BoundCastData {
|
|
7
|
-
MapBoundCastData(BoundCastInfo key_cast, BoundCastInfo value_cast)
|
|
8
|
-
: key_cast(move(key_cast)), value_cast(move(value_cast)) {
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
BoundCastInfo key_cast;
|
|
12
|
-
BoundCastInfo value_cast;
|
|
13
|
-
|
|
14
|
-
public:
|
|
15
|
-
unique_ptr<BoundCastData> Copy() const override {
|
|
16
|
-
return make_unique<MapBoundCastData>(key_cast.Copy(), value_cast.Copy());
|
|
17
|
-
}
|
|
18
|
-
};
|
|
19
|
-
|
|
20
|
-
unique_ptr<BoundCastData> BindMapToMapCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
21
|
-
vector<BoundCastInfo> child_cast_info;
|
|
22
|
-
auto source_key = LogicalType::LIST(MapType::KeyType(source));
|
|
23
|
-
auto target_key = LogicalType::LIST(MapType::KeyType(target));
|
|
24
|
-
auto source_val = LogicalType::LIST(MapType::ValueType(source));
|
|
25
|
-
auto target_val = LogicalType::LIST(MapType::ValueType(target));
|
|
26
|
-
auto key_cast = input.GetCastFunction(source_key, target_key);
|
|
27
|
-
auto value_cast = input.GetCastFunction(source_val, target_val);
|
|
28
|
-
return make_unique<MapBoundCastData>(move(key_cast), move(value_cast));
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
static bool MapToMapCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
|
32
|
-
auto &cast_data = (MapBoundCastData &)*parameters.cast_data;
|
|
33
|
-
CastParameters key_params(parameters, cast_data.key_cast.cast_data.get());
|
|
34
|
-
if (!cast_data.key_cast.function(MapVector::GetKeys(source), MapVector::GetKeys(result), count, key_params)) {
|
|
35
|
-
return false;
|
|
36
|
-
}
|
|
37
|
-
CastParameters val_params(parameters, cast_data.value_cast.cast_data.get());
|
|
38
|
-
if (!cast_data.value_cast.function(MapVector::GetValues(source), MapVector::GetValues(result), count, val_params)) {
|
|
39
|
-
return false;
|
|
40
|
-
}
|
|
41
|
-
if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
|
42
|
-
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
43
|
-
ConstantVector::SetNull(result, ConstantVector::IsNull(source));
|
|
44
|
-
} else {
|
|
45
|
-
source.Flatten(count);
|
|
46
|
-
FlatVector::Validity(result) = FlatVector::Validity(source);
|
|
47
|
-
}
|
|
48
|
-
return true;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
6
|
static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters ¶meters) {
|
|
52
7
|
auto constant = source.GetVectorType() == VectorType::CONSTANT_VECTOR;
|
|
53
|
-
// first cast the child elements to varchar
|
|
54
8
|
auto varchar_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
|
|
55
9
|
Vector varchar_map(varchar_type, count);
|
|
56
|
-
MapToMapCast(source, varchar_map, count, parameters);
|
|
57
10
|
|
|
58
|
-
//
|
|
59
|
-
varchar_map
|
|
11
|
+
// since map's physical type is a list, the ListCast can be utilized
|
|
12
|
+
ListCast::ListToListCast(source, varchar_map, count, parameters);
|
|
60
13
|
|
|
14
|
+
varchar_map.Flatten(count);
|
|
61
15
|
auto &validity = FlatVector::Validity(varchar_map);
|
|
62
|
-
auto &
|
|
63
|
-
auto &
|
|
64
|
-
auto &key_str = ListVector::GetEntry(key_lists);
|
|
65
|
-
auto &val_str = ListVector::GetEntry(val_lists);
|
|
16
|
+
auto &key_str = MapVector::GetKeys(varchar_map);
|
|
17
|
+
auto &val_str = MapVector::GetValues(varchar_map);
|
|
66
18
|
|
|
67
|
-
key_str.Flatten(ListVector::GetListSize(
|
|
68
|
-
val_str.Flatten(ListVector::GetListSize(
|
|
19
|
+
key_str.Flatten(ListVector::GetListSize(source));
|
|
20
|
+
val_str.Flatten(ListVector::GetListSize(source));
|
|
69
21
|
|
|
70
|
-
auto list_data =
|
|
22
|
+
auto list_data = ListVector::GetData(varchar_map);
|
|
71
23
|
auto key_data = FlatVector::GetData<string_t>(key_str);
|
|
72
24
|
auto val_data = FlatVector::GetData<string_t>(val_str);
|
|
73
25
|
auto &key_validity = FlatVector::Validity(key_str);
|
|
74
26
|
auto &val_validity = FlatVector::Validity(val_str);
|
|
27
|
+
auto &struct_validity = FlatVector::Validity(ListVector::GetEntry(varchar_map));
|
|
75
28
|
|
|
76
29
|
auto result_data = FlatVector::GetData<string_t>(result);
|
|
77
30
|
for (idx_t i = 0; i < count; i++) {
|
|
@@ -86,8 +39,15 @@ static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastPa
|
|
|
86
39
|
ret += ", ";
|
|
87
40
|
}
|
|
88
41
|
auto idx = list.offset + list_idx;
|
|
42
|
+
|
|
43
|
+
if (!struct_validity.RowIsValid(idx)) {
|
|
44
|
+
ret += "NULL";
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
89
47
|
if (!key_validity.RowIsValid(idx)) {
|
|
90
|
-
throw InternalException("Error in map: key validity invalid?!");
|
|
48
|
+
// throw InternalException("Error in map: key validity invalid?!");
|
|
49
|
+
ret += "invalid";
|
|
50
|
+
continue;
|
|
91
51
|
}
|
|
92
52
|
ret += key_data[idx].GetString();
|
|
93
53
|
ret += "=";
|
|
@@ -106,12 +66,11 @@ static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastPa
|
|
|
106
66
|
BoundCastInfo DefaultCasts::MapCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
|
|
107
67
|
switch (target.id()) {
|
|
108
68
|
case LogicalTypeId::MAP:
|
|
109
|
-
return BoundCastInfo(
|
|
69
|
+
return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
|
|
110
70
|
case LogicalTypeId::JSON:
|
|
111
71
|
case LogicalTypeId::VARCHAR: {
|
|
112
|
-
// bind a cast in which we convert the key/value to VARCHAR entries
|
|
113
72
|
auto varchar_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
|
|
114
|
-
return BoundCastInfo(MapToVarcharCast,
|
|
73
|
+
return BoundCastInfo(MapToVarcharCast, ListBoundCastData::BindListToListCast(input, source, varchar_type));
|
|
115
74
|
}
|
|
116
75
|
default:
|
|
117
76
|
return TryVectorNullCast;
|
|
@@ -140,7 +140,6 @@ static void ListContainsOrPosition(DataChunk &args, ExpressionState &state, Vect
|
|
|
140
140
|
case PhysicalType::VARCHAR:
|
|
141
141
|
TemplatedContainsOrPosition<string_t, T, OP>(args, state, result);
|
|
142
142
|
break;
|
|
143
|
-
case PhysicalType::MAP:
|
|
144
143
|
case PhysicalType::STRUCT:
|
|
145
144
|
case PhysicalType::LIST:
|
|
146
145
|
TemplatedContainsOrPosition<int8_t, T, OP>(args, state, result, true);
|
|
@@ -8,17 +8,14 @@ namespace duckdb {
|
|
|
8
8
|
|
|
9
9
|
static void CardinalityFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
10
10
|
auto &map = args.data[0];
|
|
11
|
-
UnifiedVectorFormat list_data;
|
|
12
11
|
UnifiedVectorFormat map_data;
|
|
13
12
|
result.SetVectorType(VectorType::FLAT_VECTOR);
|
|
14
13
|
auto result_data = FlatVector::GetData<uint64_t>(result);
|
|
15
14
|
auto &result_validity = FlatVector::Validity(result);
|
|
16
15
|
|
|
17
16
|
map.ToUnifiedFormat(args.size(), map_data);
|
|
18
|
-
auto &children = StructVector::GetEntries(map);
|
|
19
|
-
children[0]->ToUnifiedFormat(args.size(), list_data);
|
|
20
17
|
for (idx_t row = 0; row < args.size(); row++) {
|
|
21
|
-
auto list_entry = ((list_entry_t *)
|
|
18
|
+
auto list_entry = ((list_entry_t *)map_data.data)[map_data.sel->get_index(row)];
|
|
22
19
|
result_data[row] = list_entry.length;
|
|
23
20
|
result_validity.Set(row, map_data.validity.RowIsValid(map_data.sel->get_index(row)));
|
|
24
21
|
}
|
|
@@ -2,31 +2,25 @@
|
|
|
2
2
|
#include "duckdb/common/string_util.hpp"
|
|
3
3
|
#include "duckdb/parser/expression/bound_expression.hpp"
|
|
4
4
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
|
5
|
-
#include "duckdb/function/aggregate/nested_functions.hpp"
|
|
6
5
|
#include "duckdb/common/types/data_chunk.hpp"
|
|
7
6
|
#include "duckdb/common/pair.hpp"
|
|
8
7
|
#include "duckdb/common/types/value_map.hpp"
|
|
9
8
|
|
|
10
9
|
namespace duckdb {
|
|
11
10
|
|
|
12
|
-
// TODO: this doesn't recursively verify maps if maps are nested
|
|
13
11
|
MapInvalidReason CheckMapValidity(Vector &map, idx_t count, const SelectionVector &sel) {
|
|
14
12
|
D_ASSERT(map.GetType().id() == LogicalTypeId::MAP);
|
|
15
13
|
UnifiedVectorFormat map_vdata;
|
|
14
|
+
|
|
16
15
|
map.ToUnifiedFormat(count, map_vdata);
|
|
17
16
|
auto &map_validity = map_vdata.validity;
|
|
18
17
|
|
|
19
|
-
auto
|
|
18
|
+
auto list_data = ListVector::GetData(map);
|
|
19
|
+
auto &keys = MapVector::GetKeys(map);
|
|
20
20
|
UnifiedVectorFormat key_vdata;
|
|
21
|
-
|
|
22
|
-
auto key_data = (list_entry_t *)key_vdata.data;
|
|
21
|
+
keys.ToUnifiedFormat(count, key_vdata);
|
|
23
22
|
auto &key_validity = key_vdata.validity;
|
|
24
23
|
|
|
25
|
-
auto &key_entries = ListVector::GetEntry(key_vector);
|
|
26
|
-
UnifiedVectorFormat key_entry_vdata;
|
|
27
|
-
key_entries.ToUnifiedFormat(count, key_entry_vdata);
|
|
28
|
-
auto &entry_validity = key_entry_vdata.validity;
|
|
29
|
-
|
|
30
24
|
for (idx_t row = 0; row < count; row++) {
|
|
31
25
|
auto mapped_row = sel.get_index(row);
|
|
32
26
|
auto row_idx = map_vdata.sel->get_index(mapped_row);
|
|
@@ -35,17 +29,14 @@ MapInvalidReason CheckMapValidity(Vector &map, idx_t count, const SelectionVecto
|
|
|
35
29
|
continue;
|
|
36
30
|
}
|
|
37
31
|
row_idx = key_vdata.sel->get_index(row);
|
|
38
|
-
if (!key_validity.RowIsValid(row_idx)) {
|
|
39
|
-
return MapInvalidReason::NULL_KEY_LIST;
|
|
40
|
-
}
|
|
41
32
|
value_set_t unique_keys;
|
|
42
|
-
for (idx_t i = 0; i <
|
|
43
|
-
auto index =
|
|
44
|
-
index =
|
|
45
|
-
if (!
|
|
33
|
+
for (idx_t i = 0; i < list_data[row_idx].length; i++) {
|
|
34
|
+
auto index = list_data[row_idx].offset + i;
|
|
35
|
+
index = key_vdata.sel->get_index(index);
|
|
36
|
+
if (!key_validity.RowIsValid(index)) {
|
|
46
37
|
return MapInvalidReason::NULL_KEY;
|
|
47
38
|
}
|
|
48
|
-
auto value =
|
|
39
|
+
auto value = keys.GetValue(index);
|
|
49
40
|
auto result = unique_keys.insert(value);
|
|
50
41
|
if (!result.second) {
|
|
51
42
|
return MapInvalidReason::DUPLICATE_KEY;
|
|
@@ -86,28 +77,19 @@ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result)
|
|
|
86
77
|
}
|
|
87
78
|
}
|
|
88
79
|
|
|
89
|
-
auto &
|
|
90
|
-
|
|
91
|
-
auto
|
|
92
|
-
auto &value_vector = *child_entries[1];
|
|
93
|
-
if (args.data.empty()) {
|
|
94
|
-
// no arguments: construct an empty map
|
|
95
|
-
ListVector::SetListSize(key_vector, 0);
|
|
96
|
-
key_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
97
|
-
auto list_data = ConstantVector::GetData<list_entry_t>(key_vector);
|
|
98
|
-
list_data->offset = 0;
|
|
99
|
-
list_data->length = 0;
|
|
80
|
+
auto &key_vector = MapVector::GetKeys(result);
|
|
81
|
+
auto &value_vector = MapVector::GetValues(result);
|
|
82
|
+
auto list_data = ListVector::GetData(result);
|
|
100
83
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
list_data = ConstantVector::GetData<list_entry_t>(value_vector);
|
|
84
|
+
if (args.data.empty()) {
|
|
85
|
+
ListVector::SetListSize(result, 0);
|
|
104
86
|
list_data->offset = 0;
|
|
105
87
|
list_data->length = 0;
|
|
106
|
-
|
|
107
88
|
result.Verify(args.size());
|
|
108
89
|
return;
|
|
109
90
|
}
|
|
110
91
|
|
|
92
|
+
auto args_data = ListVector::GetData(args.data[0]);
|
|
111
93
|
auto key_count = ListVector::GetListSize(args.data[0]);
|
|
112
94
|
auto value_count = ListVector::GetListSize(args.data[1]);
|
|
113
95
|
if (key_count != value_count) {
|
|
@@ -115,11 +97,16 @@ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result)
|
|
|
115
97
|
"Error in MAP creation: key list has a different size from value list (%lld keys, %lld values)", key_count,
|
|
116
98
|
value_count);
|
|
117
99
|
}
|
|
100
|
+
ListVector::Reserve(result, key_count);
|
|
101
|
+
ListVector::SetListSize(result, key_count);
|
|
118
102
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
103
|
+
for (idx_t i = 0; i < args.size(); i++) {
|
|
104
|
+
list_data[i] = args_data[i];
|
|
105
|
+
}
|
|
122
106
|
|
|
107
|
+
key_vector.Reference(ListVector::GetEntry(args.data[0]));
|
|
108
|
+
value_vector.Reference(ListVector::GetEntry(args.data[1]));
|
|
109
|
+
MapConversionVerify(result, args.size());
|
|
123
110
|
result.Verify(args.size());
|
|
124
111
|
}
|
|
125
112
|
|
|
@@ -147,8 +134,9 @@ static unique_ptr<FunctionData> MapBind(ClientContext &context, ScalarFunction &
|
|
|
147
134
|
child_types.push_back(make_pair("value", empty));
|
|
148
135
|
}
|
|
149
136
|
|
|
150
|
-
|
|
151
|
-
|
|
137
|
+
bound_function.return_type =
|
|
138
|
+
LogicalType::MAP(ListType::GetChildType(child_types[0].second), ListType::GetChildType(child_types[1].second));
|
|
139
|
+
|
|
152
140
|
return make_unique<VariableReturnBindData>(bound_function.return_type);
|
|
153
141
|
}
|
|
154
142
|
|
|
@@ -27,7 +27,6 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
|
|
|
27
27
|
|
|
28
28
|
if (args.data[1].GetType().id() == LogicalTypeId::SQLNULL) {
|
|
29
29
|
//! We don't need to look through the map if the 'key' to look for is NULL
|
|
30
|
-
//! Because maps can't have NULL as key
|
|
31
30
|
ListVector::SetListSize(result, 0);
|
|
32
31
|
result.SetVectorType(VectorType::CONSTANT_VECTOR);
|
|
33
32
|
auto list_data = ConstantVector::GetData<list_entry_t>(result);
|
|
@@ -40,21 +39,23 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
|
|
|
40
39
|
auto &map = args.data[0];
|
|
41
40
|
auto &key = args.data[1];
|
|
42
41
|
|
|
43
|
-
UnifiedVectorFormat
|
|
42
|
+
UnifiedVectorFormat map_data;
|
|
44
43
|
UnifiedVectorFormat key_data;
|
|
45
44
|
|
|
46
45
|
auto &map_keys = MapVector::GetKeys(map);
|
|
47
46
|
auto &map_values = MapVector::GetValues(map);
|
|
48
47
|
|
|
49
|
-
|
|
48
|
+
map.ToUnifiedFormat(args.size(), map_data);
|
|
50
49
|
key.ToUnifiedFormat(args.size(), key_data);
|
|
51
50
|
|
|
52
51
|
for (idx_t row = 0; row < args.size(); row++) {
|
|
53
|
-
idx_t row_index =
|
|
52
|
+
idx_t row_index = map_data.sel->get_index(row);
|
|
54
53
|
idx_t key_index = key_data.sel->get_index(row);
|
|
55
54
|
auto key_value = key.GetValue(key_index);
|
|
56
|
-
|
|
57
|
-
|
|
55
|
+
|
|
56
|
+
list_entry_t entry = ListVector::GetData(map)[row_index];
|
|
57
|
+
auto offsets = MapVector::Search(map_keys, args.size(), key_value, entry);
|
|
58
|
+
auto values = FlatVector::GetValuesFromOffsets(map_values, offsets);
|
|
58
59
|
FillResult(values, result, row);
|
|
59
60
|
}
|
|
60
61
|
|
|
@@ -3,124 +3,14 @@
|
|
|
3
3
|
#include "duckdb/parser/expression/bound_expression.hpp"
|
|
4
4
|
#include "duckdb/function/scalar/nested_functions.hpp"
|
|
5
5
|
#include "duckdb/common/types/data_chunk.hpp"
|
|
6
|
-
#include "duckdb/common/pair.hpp"
|
|
7
6
|
|
|
8
7
|
namespace duckdb {
|
|
9
8
|
|
|
10
|
-
struct VectorInfo {
|
|
11
|
-
Vector &container;
|
|
12
|
-
list_entry_t &data;
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
static void MapStruct(Value &element, VectorInfo &keys, VectorInfo &values) {
|
|
16
|
-
D_ASSERT(element.type().id() == LogicalTypeId::STRUCT);
|
|
17
|
-
D_ASSERT(!element.IsNull());
|
|
18
|
-
auto &key_value = StructValue::GetChildren(element);
|
|
19
|
-
auto &key = key_value[0];
|
|
20
|
-
auto &value = key_value[1];
|
|
21
|
-
|
|
22
|
-
if (key.IsNull()) {
|
|
23
|
-
throw InvalidInputException("None of the keys of the map can be NULL");
|
|
24
|
-
}
|
|
25
|
-
// Add to the inner key/value lists of the resulting map
|
|
26
|
-
ListVector::PushBack(keys.container, key);
|
|
27
|
-
ListVector::PushBack(values.container, value);
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
// FIXME: this operation has a time complexity of O(n^2)
|
|
31
|
-
void CheckKeyUniqueness(VectorInfo &keys) {
|
|
32
|
-
auto end = keys.data.offset + keys.data.length;
|
|
33
|
-
auto &entries = ListVector::GetEntry(keys.container);
|
|
34
|
-
for (auto lhs = keys.data.offset; lhs < end; lhs++) {
|
|
35
|
-
auto element = entries.GetValue(lhs);
|
|
36
|
-
D_ASSERT(!element.IsNull());
|
|
37
|
-
for (auto rhs = lhs + 1; rhs < end; rhs++) {
|
|
38
|
-
auto other = entries.GetValue(rhs);
|
|
39
|
-
D_ASSERT(!other.IsNull());
|
|
40
|
-
|
|
41
|
-
if (element.type() != other.type()) {
|
|
42
|
-
throw InvalidInputException("Not all keys are of the same type!");
|
|
43
|
-
}
|
|
44
|
-
if (element == other) {
|
|
45
|
-
throw InvalidInputException("The given keys aren't unique");
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
static bool MapSingleList(VectorInfo &input, VectorInfo &keys, VectorInfo &values) {
|
|
52
|
-
// Get the length and offset of this list from the argument data
|
|
53
|
-
auto pair_amount = input.data.length;
|
|
54
|
-
auto input_offset = input.data.offset;
|
|
55
|
-
|
|
56
|
-
// Loop over the list of structs
|
|
57
|
-
idx_t inserted_values = 0;
|
|
58
|
-
for (idx_t i = 0; i < pair_amount; i++) {
|
|
59
|
-
auto index = i + input_offset;
|
|
60
|
-
// Get the struct using the offset and the index;
|
|
61
|
-
auto element = input.container.GetValue(index);
|
|
62
|
-
if (element.IsNull()) {
|
|
63
|
-
continue;
|
|
64
|
-
}
|
|
65
|
-
MapStruct(element, keys, values);
|
|
66
|
-
inserted_values++;
|
|
67
|
-
}
|
|
68
|
-
// Set the length of the key value lists
|
|
69
|
-
keys.data.length = inserted_values;
|
|
70
|
-
values.data.length = inserted_values;
|
|
71
|
-
return inserted_values != 0;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
9
|
static void MapFromEntriesFunction(DataChunk &args, ExpressionState &state, Vector &result) {
|
|
75
|
-
D_ASSERT(result.GetType().id() == LogicalTypeId::MAP);
|
|
76
|
-
|
|
77
|
-
result.SetVectorType(duckdb::VectorType::FLAT_VECTOR);
|
|
78
|
-
|
|
79
|
-
// Get the arguments vector
|
|
80
|
-
auto &input_list = args.data[0];
|
|
81
|
-
auto arg_data = FlatVector::GetData<list_entry_t>(input_list);
|
|
82
|
-
auto &entries = ListVector::GetEntry(input_list);
|
|
83
|
-
|
|
84
|
-
// Prepare the result vectors
|
|
85
|
-
auto &child_entries = StructVector::GetEntries(result);
|
|
86
|
-
D_ASSERT(child_entries.size() == 2);
|
|
87
|
-
auto &key_vector = *child_entries[0];
|
|
88
|
-
auto &value_vector = *child_entries[1];
|
|
89
|
-
auto &result_validity = FlatVector::Validity(result);
|
|
90
|
-
|
|
91
|
-
// Get the offset+length data for the list(s)
|
|
92
|
-
auto key_data = FlatVector::GetData<list_entry_t>(key_vector);
|
|
93
|
-
auto value_data = FlatVector::GetData<list_entry_t>(value_vector);
|
|
94
|
-
|
|
95
|
-
auto &key_validity = FlatVector::Validity(key_vector);
|
|
96
|
-
auto &value_validity = FlatVector::Validity(value_vector);
|
|
97
|
-
|
|
98
10
|
auto count = args.size();
|
|
99
11
|
|
|
100
|
-
|
|
101
|
-
input_list.ToUnifiedFormat(count, input_list_data);
|
|
102
|
-
|
|
103
|
-
// Current offset into the keys/values list
|
|
104
|
-
idx_t offset = 0;
|
|
105
|
-
|
|
106
|
-
// Transform to mapped values
|
|
107
|
-
for (idx_t i = 0; i < count; i++) {
|
|
108
|
-
VectorInfo input {entries, arg_data[i]};
|
|
109
|
-
VectorInfo keys {key_vector, key_data[i]};
|
|
110
|
-
VectorInfo values {value_vector, value_data[i]};
|
|
12
|
+
result.Reinterpret(args.data[0]);
|
|
111
13
|
|
|
112
|
-
keys.data.offset = offset;
|
|
113
|
-
values.data.offset = offset;
|
|
114
|
-
auto row_valid = MapSingleList(input, keys, values);
|
|
115
|
-
offset += keys.data.length;
|
|
116
|
-
|
|
117
|
-
// Check validity
|
|
118
|
-
if (!row_valid || !input_list_data.validity.RowIsValid(i)) {
|
|
119
|
-
key_validity.SetInvalid(i);
|
|
120
|
-
value_validity.SetInvalid(i);
|
|
121
|
-
result_validity.SetInvalid(i);
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
14
|
MapConversionVerify(result, count);
|
|
125
15
|
result.Verify(count);
|
|
126
16
|
|
|
@@ -131,8 +21,6 @@ static void MapFromEntriesFunction(DataChunk &args, ExpressionState &state, Vect
|
|
|
131
21
|
|
|
132
22
|
static unique_ptr<FunctionData> MapFromEntriesBind(ClientContext &context, ScalarFunction &bound_function,
|
|
133
23
|
vector<unique_ptr<Expression>> &arguments) {
|
|
134
|
-
child_list_t<LogicalType> child_types;
|
|
135
|
-
|
|
136
24
|
if (arguments.size() != 1) {
|
|
137
25
|
throw InvalidInputException("The input argument must be a list of structs.");
|
|
138
26
|
}
|
|
@@ -155,11 +43,8 @@ static unique_ptr<FunctionData> MapFromEntriesBind(ClientContext &context, Scala
|
|
|
155
43
|
if (children.size() != 2) {
|
|
156
44
|
throw InvalidInputException("The provided struct type should only contain 2 fields, a key and a value");
|
|
157
45
|
}
|
|
158
|
-
child_types.push_back(make_pair("key", LogicalType::LIST(children[0].second)));
|
|
159
|
-
child_types.push_back(make_pair("value", LogicalType::LIST(children[1].second)));
|
|
160
46
|
|
|
161
|
-
|
|
162
|
-
bound_function.return_type = LogicalType::MAP(move(child_types));
|
|
47
|
+
bound_function.return_type = LogicalType::MAP(elem_type);
|
|
163
48
|
return make_unique<VariableReturnBindData>(bound_function.return_type);
|
|
164
49
|
}
|
|
165
50
|
|
|
@@ -219,6 +219,7 @@ static unique_ptr<FunctionData> BindAggregateState(ClientContext &context, Scala
|
|
|
219
219
|
// FIXME: this is really hacky
|
|
220
220
|
// but the aggregate state export needs a rework around how it handles more complex aggregates anyway
|
|
221
221
|
vector<unique_ptr<Expression>> args;
|
|
222
|
+
args.reserve(state_type.bound_argument_types.size());
|
|
222
223
|
for (auto &arg_type : state_type.bound_argument_types) {
|
|
223
224
|
args.push_back(make_unique<BoundConstantExpression>(Value(arg_type)));
|
|
224
225
|
}
|
|
@@ -302,7 +303,6 @@ ExportAggregateFunction::Bind(unique_ptr<BoundAggregateExpression> child_aggrega
|
|
|
302
303
|
// this should be required
|
|
303
304
|
D_ASSERT(bound_function.state_size);
|
|
304
305
|
D_ASSERT(bound_function.finalize);
|
|
305
|
-
D_ASSERT(!bound_function.window);
|
|
306
306
|
|
|
307
307
|
D_ASSERT(child_aggregate->function.return_type.id() != LogicalTypeId::INVALID);
|
|
308
308
|
#ifdef DEBUG
|
|
@@ -124,17 +124,13 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
|
|
|
124
124
|
return LogicalType::STRUCT(move(child_types));
|
|
125
125
|
|
|
126
126
|
} else if (format == "+m") {
|
|
127
|
-
|
|
128
|
-
//! First type will be struct, so we skip it
|
|
129
|
-
auto &struct_schema = *schema.children[0];
|
|
130
|
-
for (idx_t type_idx = 0; type_idx < (idx_t)struct_schema.n_children; type_idx++) {
|
|
131
|
-
//! The other types must be added on lists
|
|
132
|
-
auto child_type = GetArrowLogicalType(*struct_schema.children[type_idx], arrow_convert_data, col_idx);
|
|
127
|
+
arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
|
|
133
128
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
129
|
+
auto &arrow_struct_type = *schema.children[0];
|
|
130
|
+
D_ASSERT(arrow_struct_type.n_children == 2);
|
|
131
|
+
auto key_type = GetArrowLogicalType(*arrow_struct_type.children[0], arrow_convert_data, col_idx);
|
|
132
|
+
auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1], arrow_convert_data, col_idx);
|
|
133
|
+
return LogicalType::MAP(key_type, value_type);
|
|
138
134
|
} else if (format == "z") {
|
|
139
135
|
arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
|
|
140
136
|
return LogicalType::BLOB;
|
|
@@ -227,47 +227,6 @@ void ArrowToDuckDBMapVerify(Vector &vector, idx_t count) {
|
|
|
227
227
|
}
|
|
228
228
|
}
|
|
229
229
|
|
|
230
|
-
void ArrowToDuckDBMapList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
|
|
231
|
-
unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
|
|
232
|
-
pair<idx_t, idx_t> &arrow_convert_idx, uint32_t *offsets, ValidityMask *parent_mask) {
|
|
233
|
-
idx_t list_size = offsets[size] - offsets[0];
|
|
234
|
-
ListVector::Reserve(vector, list_size);
|
|
235
|
-
|
|
236
|
-
auto &child_vector = ListVector::GetEntry(vector);
|
|
237
|
-
auto list_data = FlatVector::GetData<list_entry_t>(vector);
|
|
238
|
-
auto cur_offset = 0;
|
|
239
|
-
for (idx_t i = 0; i < size; i++) {
|
|
240
|
-
auto &le = list_data[i];
|
|
241
|
-
le.offset = cur_offset;
|
|
242
|
-
le.length = offsets[i + 1] - offsets[i];
|
|
243
|
-
cur_offset += le.length;
|
|
244
|
-
}
|
|
245
|
-
ListVector::SetListSize(vector, list_size);
|
|
246
|
-
if (list_size == 0 && offsets[0] == 0) {
|
|
247
|
-
SetValidityMask(child_vector, array, scan_state, list_size, -1);
|
|
248
|
-
} else {
|
|
249
|
-
SetValidityMask(child_vector, array, scan_state, list_size, offsets[0]);
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
auto &list_mask = FlatVector::Validity(vector);
|
|
253
|
-
if (parent_mask) {
|
|
254
|
-
//! Since this List is owned by a struct we must guarantee their validity map matches on Null
|
|
255
|
-
if (!parent_mask->AllValid()) {
|
|
256
|
-
for (idx_t i = 0; i < size; i++) {
|
|
257
|
-
if (!parent_mask->RowIsValid(i)) {
|
|
258
|
-
list_mask.SetInvalid(i);
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
if (list_size == 0 && offsets[0] == 0) {
|
|
264
|
-
ColumnArrowToDuckDB(child_vector, array, scan_state, list_size, arrow_convert_data, col_idx, arrow_convert_idx,
|
|
265
|
-
-1);
|
|
266
|
-
} else {
|
|
267
|
-
ColumnArrowToDuckDB(child_vector, array, scan_state, list_size, arrow_convert_data, col_idx, arrow_convert_idx,
|
|
268
|
-
offsets[0]);
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
230
|
template <class T>
|
|
272
231
|
static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets) {
|
|
273
232
|
auto strings = FlatVector::GetData<string_t>(vector);
|
|
@@ -619,20 +578,8 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
|
|
|
619
578
|
break;
|
|
620
579
|
}
|
|
621
580
|
case LogicalTypeId::MAP: {
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
auto &child_entries = StructVector::GetEntries(vector);
|
|
625
|
-
D_ASSERT(child_entries.size() == 2);
|
|
626
|
-
auto offsets = (uint32_t *)array.buffers[1] + array.offset + scan_state.chunk_offset;
|
|
627
|
-
if (nested_offset != -1) {
|
|
628
|
-
offsets = (uint32_t *)array.buffers[1] + nested_offset;
|
|
629
|
-
}
|
|
630
|
-
auto &struct_validity_mask = FlatVector::Validity(vector);
|
|
631
|
-
//! Fill the children
|
|
632
|
-
for (idx_t type_idx = 0; type_idx < (idx_t)struct_arrow.n_children; type_idx++) {
|
|
633
|
-
ArrowToDuckDBMapList(*child_entries[type_idx], *struct_arrow.children[type_idx], scan_state, size,
|
|
634
|
-
arrow_convert_data, col_idx, arrow_convert_idx, offsets, &struct_validity_mask);
|
|
635
|
-
}
|
|
581
|
+
ArrowToDuckDBList(vector, array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx,
|
|
582
|
+
nested_offset, parent_mask);
|
|
636
583
|
ArrowToDuckDBMapVerify(vector, size);
|
|
637
584
|
break;
|
|
638
585
|
}
|