duckdb 0.6.2-dev914.0 → 0.6.2-dev939.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/json_functions/json_create.cpp +6 -12
  3. package/src/duckdb/extension/parquet/column_writer.cpp +6 -5
  4. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -8
  5. package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -15
  6. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -5
  7. package/src/duckdb/src/common/row_operations/row_gather.cpp +0 -1
  8. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -1
  9. package/src/duckdb/src/common/row_operations/row_scatter.cpp +0 -2
  10. package/src/duckdb/src/common/sort/sort_state.cpp +0 -2
  11. package/src/duckdb/src/common/types/value.cpp +6 -8
  12. package/src/duckdb/src/common/types/vector.cpp +40 -43
  13. package/src/duckdb/src/common/types.cpp +20 -25
  14. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +0 -1
  15. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +0 -2
  16. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +0 -2
  17. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +0 -1
  18. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +0 -1
  19. package/src/duckdb/src/function/aggregate/nested/histogram.cpp +7 -24
  20. package/src/duckdb/src/function/cast/list_casts.cpp +3 -3
  21. package/src/duckdb/src/function/cast/map_cast.cpp +19 -60
  22. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +0 -1
  23. package/src/duckdb/src/function/scalar/map/cardinality.cpp +1 -4
  24. package/src/duckdb/src/function/scalar/map/map.cpp +26 -38
  25. package/src/duckdb/src/function/scalar/map/map_extract.cpp +7 -6
  26. package/src/duckdb/src/function/scalar/map/map_from_entries.cpp +2 -117
  27. package/src/duckdb/src/function/table/arrow.cpp +6 -10
  28. package/src/duckdb/src/function/table/arrow_conversion.cpp +2 -55
  29. package/src/duckdb/src/function/table/system/test_all_types.cpp +14 -3
  30. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  31. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -2
  32. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +2 -2
  33. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -3
  34. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +4 -0
  35. package/src/duckdb/src/main/capi/logical_types-c.cpp +3 -2
  36. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +1 -8
  37. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +11 -9
  38. package/src/duckdb/src/planner/expression_binder.cpp +6 -6
  39. package/src/duckdb/src/storage/storage_info.cpp +1 -1
@@ -12,7 +12,7 @@ unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &i
12
12
  return make_unique<ListBoundCastData>(move(child_cast));
13
13
  }
14
14
 
15
- static bool ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
15
+ bool ListCast::ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
16
16
  auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
17
17
 
18
18
  // only handle constant and flat vectors here for now
@@ -53,7 +53,7 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
53
53
  auto constant = source.GetVectorType() == VectorType::CONSTANT_VECTOR;
54
54
  // first cast the child vector to varchar
55
55
  Vector varchar_list(LogicalType::LIST(LogicalType::VARCHAR), count);
56
- ListToListCast(source, varchar_list, count, parameters);
56
+ ListCast::ListToListCast(source, varchar_list, count, parameters);
57
57
 
58
58
  // now construct the actual varchar vector
59
59
  varchar_list.Flatten(count);
@@ -116,7 +116,7 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
116
116
  BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
117
117
  switch (target.id()) {
118
118
  case LogicalTypeId::LIST:
119
- return BoundCastInfo(ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
119
+ return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
120
120
  case LogicalTypeId::VARCHAR:
121
121
  case LogicalTypeId::JSON:
122
122
  return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
@@ -3,75 +3,28 @@
3
3
 
4
4
  namespace duckdb {
5
5
 
6
- struct MapBoundCastData : public BoundCastData {
7
- MapBoundCastData(BoundCastInfo key_cast, BoundCastInfo value_cast)
8
- : key_cast(move(key_cast)), value_cast(move(value_cast)) {
9
- }
10
-
11
- BoundCastInfo key_cast;
12
- BoundCastInfo value_cast;
13
-
14
- public:
15
- unique_ptr<BoundCastData> Copy() const override {
16
- return make_unique<MapBoundCastData>(key_cast.Copy(), value_cast.Copy());
17
- }
18
- };
19
-
20
- unique_ptr<BoundCastData> BindMapToMapCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
21
- vector<BoundCastInfo> child_cast_info;
22
- auto source_key = LogicalType::LIST(MapType::KeyType(source));
23
- auto target_key = LogicalType::LIST(MapType::KeyType(target));
24
- auto source_val = LogicalType::LIST(MapType::ValueType(source));
25
- auto target_val = LogicalType::LIST(MapType::ValueType(target));
26
- auto key_cast = input.GetCastFunction(source_key, target_key);
27
- auto value_cast = input.GetCastFunction(source_val, target_val);
28
- return make_unique<MapBoundCastData>(move(key_cast), move(value_cast));
29
- }
30
-
31
- static bool MapToMapCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
32
- auto &cast_data = (MapBoundCastData &)*parameters.cast_data;
33
- CastParameters key_params(parameters, cast_data.key_cast.cast_data.get());
34
- if (!cast_data.key_cast.function(MapVector::GetKeys(source), MapVector::GetKeys(result), count, key_params)) {
35
- return false;
36
- }
37
- CastParameters val_params(parameters, cast_data.value_cast.cast_data.get());
38
- if (!cast_data.value_cast.function(MapVector::GetValues(source), MapVector::GetValues(result), count, val_params)) {
39
- return false;
40
- }
41
- if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) {
42
- result.SetVectorType(VectorType::CONSTANT_VECTOR);
43
- ConstantVector::SetNull(result, ConstantVector::IsNull(source));
44
- } else {
45
- source.Flatten(count);
46
- FlatVector::Validity(result) = FlatVector::Validity(source);
47
- }
48
- return true;
49
- }
50
-
51
6
  static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
52
7
  auto constant = source.GetVectorType() == VectorType::CONSTANT_VECTOR;
53
- // first cast the child elements to varchar
54
8
  auto varchar_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
55
9
  Vector varchar_map(varchar_type, count);
56
- MapToMapCast(source, varchar_map, count, parameters);
57
10
 
58
- // now construct the actual varchar vector
59
- varchar_map.Flatten(count);
11
+ // since map's physical type is a list, the ListCast can be utilized
12
+ ListCast::ListToListCast(source, varchar_map, count, parameters);
60
13
 
14
+ varchar_map.Flatten(count);
61
15
  auto &validity = FlatVector::Validity(varchar_map);
62
- auto &key_lists = MapVector::GetKeys(varchar_map);
63
- auto &val_lists = MapVector::GetValues(varchar_map);
64
- auto &key_str = ListVector::GetEntry(key_lists);
65
- auto &val_str = ListVector::GetEntry(val_lists);
16
+ auto &key_str = MapVector::GetKeys(varchar_map);
17
+ auto &val_str = MapVector::GetValues(varchar_map);
66
18
 
67
- key_str.Flatten(ListVector::GetListSize(key_lists));
68
- val_str.Flatten(ListVector::GetListSize(val_lists));
19
+ key_str.Flatten(ListVector::GetListSize(source));
20
+ val_str.Flatten(ListVector::GetListSize(source));
69
21
 
70
- auto list_data = FlatVector::GetData<list_entry_t>(key_lists);
22
+ auto list_data = ListVector::GetData(varchar_map);
71
23
  auto key_data = FlatVector::GetData<string_t>(key_str);
72
24
  auto val_data = FlatVector::GetData<string_t>(val_str);
73
25
  auto &key_validity = FlatVector::Validity(key_str);
74
26
  auto &val_validity = FlatVector::Validity(val_str);
27
+ auto &struct_validity = FlatVector::Validity(ListVector::GetEntry(varchar_map));
75
28
 
76
29
  auto result_data = FlatVector::GetData<string_t>(result);
77
30
  for (idx_t i = 0; i < count; i++) {
@@ -86,8 +39,15 @@ static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastPa
86
39
  ret += ", ";
87
40
  }
88
41
  auto idx = list.offset + list_idx;
42
+
43
+ if (!struct_validity.RowIsValid(idx)) {
44
+ ret += "NULL";
45
+ continue;
46
+ }
89
47
  if (!key_validity.RowIsValid(idx)) {
90
- throw InternalException("Error in map: key validity invalid?!");
48
+ // throw InternalException("Error in map: key validity invalid?!");
49
+ ret += "invalid";
50
+ continue;
91
51
  }
92
52
  ret += key_data[idx].GetString();
93
53
  ret += "=";
@@ -106,12 +66,11 @@ static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastPa
106
66
  BoundCastInfo DefaultCasts::MapCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
107
67
  switch (target.id()) {
108
68
  case LogicalTypeId::MAP:
109
- return BoundCastInfo(MapToMapCast, BindMapToMapCast(input, source, target));
69
+ return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
110
70
  case LogicalTypeId::JSON:
111
71
  case LogicalTypeId::VARCHAR: {
112
- // bind a cast in which we convert the key/value to VARCHAR entries
113
72
  auto varchar_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
114
- return BoundCastInfo(MapToVarcharCast, BindMapToMapCast(input, source, varchar_type));
73
+ return BoundCastInfo(MapToVarcharCast, ListBoundCastData::BindListToListCast(input, source, varchar_type));
115
74
  }
116
75
  default:
117
76
  return TryVectorNullCast;
@@ -140,7 +140,6 @@ static void ListContainsOrPosition(DataChunk &args, ExpressionState &state, Vect
140
140
  case PhysicalType::VARCHAR:
141
141
  TemplatedContainsOrPosition<string_t, T, OP>(args, state, result);
142
142
  break;
143
- case PhysicalType::MAP:
144
143
  case PhysicalType::STRUCT:
145
144
  case PhysicalType::LIST:
146
145
  TemplatedContainsOrPosition<int8_t, T, OP>(args, state, result, true);
@@ -8,17 +8,14 @@ namespace duckdb {
8
8
 
9
9
  static void CardinalityFunction(DataChunk &args, ExpressionState &state, Vector &result) {
10
10
  auto &map = args.data[0];
11
- UnifiedVectorFormat list_data;
12
11
  UnifiedVectorFormat map_data;
13
12
  result.SetVectorType(VectorType::FLAT_VECTOR);
14
13
  auto result_data = FlatVector::GetData<uint64_t>(result);
15
14
  auto &result_validity = FlatVector::Validity(result);
16
15
 
17
16
  map.ToUnifiedFormat(args.size(), map_data);
18
- auto &children = StructVector::GetEntries(map);
19
- children[0]->ToUnifiedFormat(args.size(), list_data);
20
17
  for (idx_t row = 0; row < args.size(); row++) {
21
- auto list_entry = ((list_entry_t *)list_data.data)[list_data.sel->get_index(row)];
18
+ auto list_entry = ((list_entry_t *)map_data.data)[map_data.sel->get_index(row)];
22
19
  result_data[row] = list_entry.length;
23
20
  result_validity.Set(row, map_data.validity.RowIsValid(map_data.sel->get_index(row)));
24
21
  }
@@ -2,31 +2,25 @@
2
2
  #include "duckdb/common/string_util.hpp"
3
3
  #include "duckdb/parser/expression/bound_expression.hpp"
4
4
  #include "duckdb/function/scalar/nested_functions.hpp"
5
- #include "duckdb/function/aggregate/nested_functions.hpp"
6
5
  #include "duckdb/common/types/data_chunk.hpp"
7
6
  #include "duckdb/common/pair.hpp"
8
7
  #include "duckdb/common/types/value_map.hpp"
9
8
 
10
9
  namespace duckdb {
11
10
 
12
- // TODO: this doesn't recursively verify maps if maps are nested
13
11
  MapInvalidReason CheckMapValidity(Vector &map, idx_t count, const SelectionVector &sel) {
14
12
  D_ASSERT(map.GetType().id() == LogicalTypeId::MAP);
15
13
  UnifiedVectorFormat map_vdata;
14
+
16
15
  map.ToUnifiedFormat(count, map_vdata);
17
16
  auto &map_validity = map_vdata.validity;
18
17
 
19
- auto &key_vector = *(StructVector::GetEntries(map)[0]);
18
+ auto list_data = ListVector::GetData(map);
19
+ auto &keys = MapVector::GetKeys(map);
20
20
  UnifiedVectorFormat key_vdata;
21
- key_vector.ToUnifiedFormat(count, key_vdata);
22
- auto key_data = (list_entry_t *)key_vdata.data;
21
+ keys.ToUnifiedFormat(count, key_vdata);
23
22
  auto &key_validity = key_vdata.validity;
24
23
 
25
- auto &key_entries = ListVector::GetEntry(key_vector);
26
- UnifiedVectorFormat key_entry_vdata;
27
- key_entries.ToUnifiedFormat(count, key_entry_vdata);
28
- auto &entry_validity = key_entry_vdata.validity;
29
-
30
24
  for (idx_t row = 0; row < count; row++) {
31
25
  auto mapped_row = sel.get_index(row);
32
26
  auto row_idx = map_vdata.sel->get_index(mapped_row);
@@ -35,17 +29,14 @@ MapInvalidReason CheckMapValidity(Vector &map, idx_t count, const SelectionVecto
35
29
  continue;
36
30
  }
37
31
  row_idx = key_vdata.sel->get_index(row);
38
- if (!key_validity.RowIsValid(row_idx)) {
39
- return MapInvalidReason::NULL_KEY_LIST;
40
- }
41
32
  value_set_t unique_keys;
42
- for (idx_t i = 0; i < key_data[row_idx].length; i++) {
43
- auto index = key_data[row_idx].offset + i;
44
- index = key_entry_vdata.sel->get_index(index);
45
- if (!entry_validity.RowIsValid(index)) {
33
+ for (idx_t i = 0; i < list_data[row_idx].length; i++) {
34
+ auto index = list_data[row_idx].offset + i;
35
+ index = key_vdata.sel->get_index(index);
36
+ if (!key_validity.RowIsValid(index)) {
46
37
  return MapInvalidReason::NULL_KEY;
47
38
  }
48
- auto value = key_entries.GetValue(index);
39
+ auto value = keys.GetValue(index);
49
40
  auto result = unique_keys.insert(value);
50
41
  if (!result.second) {
51
42
  return MapInvalidReason::DUPLICATE_KEY;
@@ -86,28 +77,19 @@ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result)
86
77
  }
87
78
  }
88
79
 
89
- auto &child_entries = StructVector::GetEntries(result);
90
- D_ASSERT(child_entries.size() == 2);
91
- auto &key_vector = *child_entries[0];
92
- auto &value_vector = *child_entries[1];
93
- if (args.data.empty()) {
94
- // no arguments: construct an empty map
95
- ListVector::SetListSize(key_vector, 0);
96
- key_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
97
- auto list_data = ConstantVector::GetData<list_entry_t>(key_vector);
98
- list_data->offset = 0;
99
- list_data->length = 0;
80
+ auto &key_vector = MapVector::GetKeys(result);
81
+ auto &value_vector = MapVector::GetValues(result);
82
+ auto list_data = ListVector::GetData(result);
100
83
 
101
- ListVector::SetListSize(value_vector, 0);
102
- value_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
103
- list_data = ConstantVector::GetData<list_entry_t>(value_vector);
84
+ if (args.data.empty()) {
85
+ ListVector::SetListSize(result, 0);
104
86
  list_data->offset = 0;
105
87
  list_data->length = 0;
106
-
107
88
  result.Verify(args.size());
108
89
  return;
109
90
  }
110
91
 
92
+ auto args_data = ListVector::GetData(args.data[0]);
111
93
  auto key_count = ListVector::GetListSize(args.data[0]);
112
94
  auto value_count = ListVector::GetListSize(args.data[1]);
113
95
  if (key_count != value_count) {
@@ -115,11 +97,16 @@ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result)
115
97
  "Error in MAP creation: key list has a different size from value list (%lld keys, %lld values)", key_count,
116
98
  value_count);
117
99
  }
100
+ ListVector::Reserve(result, key_count);
101
+ ListVector::SetListSize(result, key_count);
118
102
 
119
- key_vector.Reference(args.data[0]);
120
- value_vector.Reference(args.data[1]);
121
- MapConversionVerify(result, args.size());
103
+ for (idx_t i = 0; i < args.size(); i++) {
104
+ list_data[i] = args_data[i];
105
+ }
122
106
 
107
+ key_vector.Reference(ListVector::GetEntry(args.data[0]));
108
+ value_vector.Reference(ListVector::GetEntry(args.data[1]));
109
+ MapConversionVerify(result, args.size());
123
110
  result.Verify(args.size());
124
111
  }
125
112
 
@@ -147,8 +134,9 @@ static unique_ptr<FunctionData> MapBind(ClientContext &context, ScalarFunction &
147
134
  child_types.push_back(make_pair("value", empty));
148
135
  }
149
136
 
150
- //! this is more for completeness reasons
151
- bound_function.return_type = LogicalType::MAP(move(child_types));
137
+ bound_function.return_type =
138
+ LogicalType::MAP(ListType::GetChildType(child_types[0].second), ListType::GetChildType(child_types[1].second));
139
+
152
140
  return make_unique<VariableReturnBindData>(bound_function.return_type);
153
141
  }
154
142
 
@@ -27,7 +27,6 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
27
27
 
28
28
  if (args.data[1].GetType().id() == LogicalTypeId::SQLNULL) {
29
29
  //! We don't need to look through the map if the 'key' to look for is NULL
30
- //! Because maps can't have NULL as key
31
30
  ListVector::SetListSize(result, 0);
32
31
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
33
32
  auto list_data = ConstantVector::GetData<list_entry_t>(result);
@@ -40,21 +39,23 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
40
39
  auto &map = args.data[0];
41
40
  auto &key = args.data[1];
42
41
 
43
- UnifiedVectorFormat map_keys_data;
42
+ UnifiedVectorFormat map_data;
44
43
  UnifiedVectorFormat key_data;
45
44
 
46
45
  auto &map_keys = MapVector::GetKeys(map);
47
46
  auto &map_values = MapVector::GetValues(map);
48
47
 
49
- map_keys.ToUnifiedFormat(args.size(), map_keys_data);
48
+ map.ToUnifiedFormat(args.size(), map_data);
50
49
  key.ToUnifiedFormat(args.size(), key_data);
51
50
 
52
51
  for (idx_t row = 0; row < args.size(); row++) {
53
- idx_t row_index = map_keys_data.sel->get_index(row);
52
+ idx_t row_index = map_data.sel->get_index(row);
54
53
  idx_t key_index = key_data.sel->get_index(row);
55
54
  auto key_value = key.GetValue(key_index);
56
- auto offsets = ListVector::Search(map_keys, key_value, row_index);
57
- auto values = ListVector::GetValuesFromOffsets(map_values, offsets);
55
+
56
+ list_entry_t entry = ListVector::GetData(map)[row_index];
57
+ auto offsets = MapVector::Search(map_keys, args.size(), key_value, entry);
58
+ auto values = FlatVector::GetValuesFromOffsets(map_values, offsets);
58
59
  FillResult(values, result, row);
59
60
  }
60
61
 
@@ -3,124 +3,14 @@
3
3
  #include "duckdb/parser/expression/bound_expression.hpp"
4
4
  #include "duckdb/function/scalar/nested_functions.hpp"
5
5
  #include "duckdb/common/types/data_chunk.hpp"
6
- #include "duckdb/common/pair.hpp"
7
6
 
8
7
  namespace duckdb {
9
8
 
10
- struct VectorInfo {
11
- Vector &container;
12
- list_entry_t &data;
13
- };
14
-
15
- static void MapStruct(Value &element, VectorInfo &keys, VectorInfo &values) {
16
- D_ASSERT(element.type().id() == LogicalTypeId::STRUCT);
17
- D_ASSERT(!element.IsNull());
18
- auto &key_value = StructValue::GetChildren(element);
19
- auto &key = key_value[0];
20
- auto &value = key_value[1];
21
-
22
- if (key.IsNull()) {
23
- throw InvalidInputException("None of the keys of the map can be NULL");
24
- }
25
- // Add to the inner key/value lists of the resulting map
26
- ListVector::PushBack(keys.container, key);
27
- ListVector::PushBack(values.container, value);
28
- }
29
-
30
- // FIXME: this operation has a time complexity of O(n^2)
31
- void CheckKeyUniqueness(VectorInfo &keys) {
32
- auto end = keys.data.offset + keys.data.length;
33
- auto &entries = ListVector::GetEntry(keys.container);
34
- for (auto lhs = keys.data.offset; lhs < end; lhs++) {
35
- auto element = entries.GetValue(lhs);
36
- D_ASSERT(!element.IsNull());
37
- for (auto rhs = lhs + 1; rhs < end; rhs++) {
38
- auto other = entries.GetValue(rhs);
39
- D_ASSERT(!other.IsNull());
40
-
41
- if (element.type() != other.type()) {
42
- throw InvalidInputException("Not all keys are of the same type!");
43
- }
44
- if (element == other) {
45
- throw InvalidInputException("The given keys aren't unique");
46
- }
47
- }
48
- }
49
- }
50
-
51
- static bool MapSingleList(VectorInfo &input, VectorInfo &keys, VectorInfo &values) {
52
- // Get the length and offset of this list from the argument data
53
- auto pair_amount = input.data.length;
54
- auto input_offset = input.data.offset;
55
-
56
- // Loop over the list of structs
57
- idx_t inserted_values = 0;
58
- for (idx_t i = 0; i < pair_amount; i++) {
59
- auto index = i + input_offset;
60
- // Get the struct using the offset and the index;
61
- auto element = input.container.GetValue(index);
62
- if (element.IsNull()) {
63
- continue;
64
- }
65
- MapStruct(element, keys, values);
66
- inserted_values++;
67
- }
68
- // Set the length of the key value lists
69
- keys.data.length = inserted_values;
70
- values.data.length = inserted_values;
71
- return inserted_values != 0;
72
- }
73
-
74
9
  static void MapFromEntriesFunction(DataChunk &args, ExpressionState &state, Vector &result) {
75
- D_ASSERT(result.GetType().id() == LogicalTypeId::MAP);
76
-
77
- result.SetVectorType(duckdb::VectorType::FLAT_VECTOR);
78
-
79
- // Get the arguments vector
80
- auto &input_list = args.data[0];
81
- auto arg_data = FlatVector::GetData<list_entry_t>(input_list);
82
- auto &entries = ListVector::GetEntry(input_list);
83
-
84
- // Prepare the result vectors
85
- auto &child_entries = StructVector::GetEntries(result);
86
- D_ASSERT(child_entries.size() == 2);
87
- auto &key_vector = *child_entries[0];
88
- auto &value_vector = *child_entries[1];
89
- auto &result_validity = FlatVector::Validity(result);
90
-
91
- // Get the offset+length data for the list(s)
92
- auto key_data = FlatVector::GetData<list_entry_t>(key_vector);
93
- auto value_data = FlatVector::GetData<list_entry_t>(value_vector);
94
-
95
- auto &key_validity = FlatVector::Validity(key_vector);
96
- auto &value_validity = FlatVector::Validity(value_vector);
97
-
98
10
  auto count = args.size();
99
11
 
100
- UnifiedVectorFormat input_list_data;
101
- input_list.ToUnifiedFormat(count, input_list_data);
102
-
103
- // Current offset into the keys/values list
104
- idx_t offset = 0;
105
-
106
- // Transform to mapped values
107
- for (idx_t i = 0; i < count; i++) {
108
- VectorInfo input {entries, arg_data[i]};
109
- VectorInfo keys {key_vector, key_data[i]};
110
- VectorInfo values {value_vector, value_data[i]};
12
+ result.Reinterpret(args.data[0]);
111
13
 
112
- keys.data.offset = offset;
113
- values.data.offset = offset;
114
- auto row_valid = MapSingleList(input, keys, values);
115
- offset += keys.data.length;
116
-
117
- // Check validity
118
- if (!row_valid || !input_list_data.validity.RowIsValid(i)) {
119
- key_validity.SetInvalid(i);
120
- value_validity.SetInvalid(i);
121
- result_validity.SetInvalid(i);
122
- }
123
- }
124
14
  MapConversionVerify(result, count);
125
15
  result.Verify(count);
126
16
 
@@ -131,8 +21,6 @@ static void MapFromEntriesFunction(DataChunk &args, ExpressionState &state, Vect
131
21
 
132
22
  static unique_ptr<FunctionData> MapFromEntriesBind(ClientContext &context, ScalarFunction &bound_function,
133
23
  vector<unique_ptr<Expression>> &arguments) {
134
- child_list_t<LogicalType> child_types;
135
-
136
24
  if (arguments.size() != 1) {
137
25
  throw InvalidInputException("The input argument must be a list of structs.");
138
26
  }
@@ -155,11 +43,8 @@ static unique_ptr<FunctionData> MapFromEntriesBind(ClientContext &context, Scala
155
43
  if (children.size() != 2) {
156
44
  throw InvalidInputException("The provided struct type should only contain 2 fields, a key and a value");
157
45
  }
158
- child_types.push_back(make_pair("key", LogicalType::LIST(children[0].second)));
159
- child_types.push_back(make_pair("value", LogicalType::LIST(children[1].second)));
160
46
 
161
- //! this is more for completeness reasons
162
- bound_function.return_type = LogicalType::MAP(move(child_types));
47
+ bound_function.return_type = LogicalType::MAP(elem_type);
163
48
  return make_unique<VariableReturnBindData>(bound_function.return_type);
164
49
  }
165
50
 
@@ -124,17 +124,13 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
124
124
  return LogicalType::STRUCT(move(child_types));
125
125
 
126
126
  } else if (format == "+m") {
127
- child_list_t<LogicalType> child_types;
128
- //! First type will be struct, so we skip it
129
- auto &struct_schema = *schema.children[0];
130
- for (idx_t type_idx = 0; type_idx < (idx_t)struct_schema.n_children; type_idx++) {
131
- //! The other types must be added on lists
132
- auto child_type = GetArrowLogicalType(*struct_schema.children[type_idx], arrow_convert_data, col_idx);
127
+ arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
133
128
 
134
- auto list_type = LogicalType::LIST(child_type);
135
- child_types.push_back({struct_schema.children[type_idx]->name, list_type});
136
- }
137
- return LogicalType::MAP(move(child_types));
129
+ auto &arrow_struct_type = *schema.children[0];
130
+ D_ASSERT(arrow_struct_type.n_children == 2);
131
+ auto key_type = GetArrowLogicalType(*arrow_struct_type.children[0], arrow_convert_data, col_idx);
132
+ auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1], arrow_convert_data, col_idx);
133
+ return LogicalType::MAP(key_type, value_type);
138
134
  } else if (format == "z") {
139
135
  arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
140
136
  return LogicalType::BLOB;
@@ -227,47 +227,6 @@ void ArrowToDuckDBMapVerify(Vector &vector, idx_t count) {
227
227
  }
228
228
  }
229
229
 
230
- void ArrowToDuckDBMapList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
231
- unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
232
- pair<idx_t, idx_t> &arrow_convert_idx, uint32_t *offsets, ValidityMask *parent_mask) {
233
- idx_t list_size = offsets[size] - offsets[0];
234
- ListVector::Reserve(vector, list_size);
235
-
236
- auto &child_vector = ListVector::GetEntry(vector);
237
- auto list_data = FlatVector::GetData<list_entry_t>(vector);
238
- auto cur_offset = 0;
239
- for (idx_t i = 0; i < size; i++) {
240
- auto &le = list_data[i];
241
- le.offset = cur_offset;
242
- le.length = offsets[i + 1] - offsets[i];
243
- cur_offset += le.length;
244
- }
245
- ListVector::SetListSize(vector, list_size);
246
- if (list_size == 0 && offsets[0] == 0) {
247
- SetValidityMask(child_vector, array, scan_state, list_size, -1);
248
- } else {
249
- SetValidityMask(child_vector, array, scan_state, list_size, offsets[0]);
250
- }
251
-
252
- auto &list_mask = FlatVector::Validity(vector);
253
- if (parent_mask) {
254
- //! Since this List is owned by a struct we must guarantee their validity map matches on Null
255
- if (!parent_mask->AllValid()) {
256
- for (idx_t i = 0; i < size; i++) {
257
- if (!parent_mask->RowIsValid(i)) {
258
- list_mask.SetInvalid(i);
259
- }
260
- }
261
- }
262
- }
263
- if (list_size == 0 && offsets[0] == 0) {
264
- ColumnArrowToDuckDB(child_vector, array, scan_state, list_size, arrow_convert_data, col_idx, arrow_convert_idx,
265
- -1);
266
- } else {
267
- ColumnArrowToDuckDB(child_vector, array, scan_state, list_size, arrow_convert_data, col_idx, arrow_convert_idx,
268
- offsets[0]);
269
- }
270
- }
271
230
  template <class T>
272
231
  static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets) {
273
232
  auto strings = FlatVector::GetData<string_t>(vector);
@@ -619,20 +578,8 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
619
578
  break;
620
579
  }
621
580
  case LogicalTypeId::MAP: {
622
- //! Since this is a map we skip first child, because its a struct
623
- auto &struct_arrow = *array.children[0];
624
- auto &child_entries = StructVector::GetEntries(vector);
625
- D_ASSERT(child_entries.size() == 2);
626
- auto offsets = (uint32_t *)array.buffers[1] + array.offset + scan_state.chunk_offset;
627
- if (nested_offset != -1) {
628
- offsets = (uint32_t *)array.buffers[1] + nested_offset;
629
- }
630
- auto &struct_validity_mask = FlatVector::Validity(vector);
631
- //! Fill the children
632
- for (idx_t type_idx = 0; type_idx < (idx_t)struct_arrow.n_children; type_idx++) {
633
- ArrowToDuckDBMapList(*child_entries[type_idx], *struct_arrow.children[type_idx], scan_state, size,
634
- arrow_convert_data, col_idx, arrow_convert_idx, offsets, &struct_validity_mask);
635
- }
581
+ ArrowToDuckDBList(vector, array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx,
582
+ nested_offset, parent_mask);
636
583
  ArrowToDuckDBMapVerify(vector, size);
637
584
  break;
638
585
  }
@@ -177,9 +177,20 @@ vector<TestType> TestAllTypesFun::GetTestTypes() {
177
177
 
178
178
  // map
179
179
  auto map_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
180
- auto min_map_value = Value::MAP(Value::EMPTYLIST(LogicalType::VARCHAR), Value::EMPTYLIST(LogicalType::VARCHAR));
181
- auto max_map_value = Value::MAP(Value::LIST({Value("key1"), Value("key2")}),
182
- Value::LIST({Value("🦆🦆🦆🦆🦆🦆"), Value("goose")}));
180
+ auto min_map_value = Value::MAP(ListType::GetChildType(map_type), std::vector<Value>());
181
+
182
+ child_list_t<Value> map_struct1;
183
+ map_struct1.push_back(make_pair("key", Value("key1")));
184
+ map_struct1.push_back(make_pair("value", Value("🦆🦆🦆🦆🦆🦆")));
185
+ child_list_t<Value> map_struct2;
186
+ map_struct2.push_back(make_pair("key", Value("key2")));
187
+ map_struct2.push_back(make_pair("key", Value("goose")));
188
+
189
+ std::vector<Value> map_values;
190
+ map_values.push_back(Value::STRUCT(map_struct1));
191
+ map_values.push_back(Value::STRUCT(map_struct2));
192
+
193
+ auto max_map_value = Value::MAP(ListType::GetChildType(map_type), map_values);
183
194
  result.emplace_back(map_type, "map", move(min_map_value), move(max_map_value));
184
195
 
185
196
  return result;
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev914"
2
+ #define DUCKDB_VERSION "0.6.2-dev939"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "51eaae44c9"
5
+ #define DUCKDB_SOURCE_ID "db2bb06ef5"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -157,8 +157,8 @@ public:
157
157
  DUCKDB_API static Value LIST(LogicalType child_type, vector<Value> values);
158
158
  //! Create an empty list with the specified child-type
159
159
  DUCKDB_API static Value EMPTYLIST(LogicalType child_type);
160
- //! Create a map value from a (key, value) pair
161
- DUCKDB_API static Value MAP(Value key, Value value);
160
+ //! Create a map value with the given entries
161
+ DUCKDB_API static Value MAP(LogicalType child_type, vector<Value> values);
162
162
  //! Create a union value from a selected value and a tag from a set of alternatives.
163
163
  DUCKDB_API static Value UNION(child_list_t<LogicalType> members, uint8_t tag, Value value);
164
164
 
@@ -304,6 +304,7 @@ struct FlatVector {
304
304
  return !vector.validity.RowIsValid(idx);
305
305
  }
306
306
  DUCKDB_API static const SelectionVector *IncrementalSelectionVector();
307
+ static Value GetValuesFromOffsets(Vector &values, vector<idx_t> &offsets);
307
308
  };
308
309
 
309
310
  struct ListVector {
@@ -330,8 +331,6 @@ struct ListVector {
330
331
  DUCKDB_API static void Append(Vector &target, const Vector &source, const SelectionVector &sel, idx_t source_size,
331
332
  idx_t source_offset = 0);
332
333
  DUCKDB_API static void PushBack(Vector &target, const Value &insert);
333
- DUCKDB_API static vector<idx_t> Search(Vector &list, const Value &key, idx_t row);
334
- DUCKDB_API static Value GetValuesFromOffsets(Vector &list, vector<idx_t> &offsets);
335
334
  //! Share the entry of the other list vector
336
335
  DUCKDB_API static void ReferenceEntry(Vector &vector, Vector &other);
337
336
  };
@@ -409,6 +408,7 @@ struct MapVector {
409
408
  DUCKDB_API static const Vector &GetValues(const Vector &vector);
410
409
  DUCKDB_API static Vector &GetKeys(Vector &vector);
411
410
  DUCKDB_API static Vector &GetValues(Vector &vector);
411
+ static vector<idx_t> Search(Vector &keys, idx_t count, const Value &key, list_entry_t &entry);
412
412
  };
413
413
 
414
414
  struct StructVector {