duckdb 0.6.2-dev919.0 → 0.6.2-dev942.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/json_functions/json_create.cpp +6 -12
  3. package/src/duckdb/extension/parquet/column_writer.cpp +6 -5
  4. package/src/duckdb/extension/parquet/parquet_reader.cpp +6 -8
  5. package/src/duckdb/src/common/arrow/arrow_appender.cpp +3 -15
  6. package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -5
  7. package/src/duckdb/src/common/row_operations/row_gather.cpp +0 -1
  8. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -1
  9. package/src/duckdb/src/common/row_operations/row_scatter.cpp +0 -2
  10. package/src/duckdb/src/common/sort/sort_state.cpp +0 -2
  11. package/src/duckdb/src/common/types/value.cpp +6 -8
  12. package/src/duckdb/src/common/types/vector.cpp +40 -43
  13. package/src/duckdb/src/common/types.cpp +20 -25
  14. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +0 -1
  15. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +0 -2
  16. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +0 -2
  17. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +0 -1
  18. package/src/duckdb/src/execution/window_segment_tree.cpp +0 -17
  19. package/src/duckdb/src/function/aggregate/distributive/count.cpp +22 -0
  20. package/src/duckdb/src/function/aggregate/distributive/minmax.cpp +0 -1
  21. package/src/duckdb/src/function/aggregate/nested/histogram.cpp +7 -24
  22. package/src/duckdb/src/function/cast/list_casts.cpp +3 -3
  23. package/src/duckdb/src/function/cast/map_cast.cpp +19 -60
  24. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +0 -1
  25. package/src/duckdb/src/function/scalar/map/cardinality.cpp +1 -4
  26. package/src/duckdb/src/function/scalar/map/map.cpp +26 -38
  27. package/src/duckdb/src/function/scalar/map/map_extract.cpp +7 -6
  28. package/src/duckdb/src/function/scalar/map/map_from_entries.cpp +2 -117
  29. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +1 -1
  30. package/src/duckdb/src/function/table/arrow.cpp +6 -10
  31. package/src/duckdb/src/function/table/arrow_conversion.cpp +2 -55
  32. package/src/duckdb/src/function/table/system/test_all_types.cpp +14 -3
  33. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  34. package/src/duckdb/src/include/duckdb/common/types/value.hpp +2 -2
  35. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +2 -2
  36. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -3
  37. package/src/duckdb/src/include/duckdb/function/cast/default_casts.hpp +4 -0
  38. package/src/duckdb/src/main/capi/logical_types-c.cpp +3 -2
  39. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +1 -8
  40. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +11 -9
  41. package/src/duckdb/src/planner/expression_binder.cpp +6 -6
  42. package/src/duckdb/src/storage/storage_info.cpp +1 -1
@@ -268,7 +268,6 @@ static bool TemplatedOptimumValue(Vector &left, idx_t lidx, idx_t lcount, Vector
268
268
  return TemplatedOptimumType<string_t, OP>(left, lidx, lcount, right, ridx, rcount);
269
269
  case PhysicalType::LIST:
270
270
  return TemplatedOptimumList<OP>(left, lidx, lcount, right, ridx, rcount);
271
- case PhysicalType::MAP:
272
271
  case PhysicalType::STRUCT:
273
272
  return TemplatedOptimumStruct<OP>(left, lidx, lcount, right, ridx, rcount);
274
273
  default:
@@ -121,40 +121,26 @@ static void HistogramFinalizeFunction(Vector &state_vector, AggregateInputData &
121
121
  auto states = (HistogramAggState<T, MAP_TYPE> **)sdata.data;
122
122
 
123
123
  auto &mask = FlatVector::Validity(result);
124
-
125
- auto &child_entries = StructVector::GetEntries(result);
126
- auto &bucket_list = child_entries[0];
127
- auto &count_list = child_entries[1];
128
-
129
- auto old_len = ListVector::GetListSize(*bucket_list);
130
-
131
- auto &bucket_validity = FlatVector::Validity(*bucket_list);
132
- auto &count_validity = FlatVector::Validity(*count_list);
124
+ auto old_len = ListVector::GetListSize(result);
133
125
 
134
126
  for (idx_t i = 0; i < count; i++) {
135
-
136
127
  const auto rid = i + offset;
137
128
  auto state = states[sdata.sel->get_index(i)];
138
129
  if (!state->hist) {
139
130
  mask.SetInvalid(rid);
140
- bucket_validity.SetInvalid(rid);
141
- count_validity.SetInvalid(rid);
142
131
  continue;
143
132
  }
144
133
 
145
134
  for (auto &entry : *state->hist) {
146
135
  Value bucket_value = OP::template HistogramFinalize<T>(entry.first);
147
- ListVector::PushBack(*bucket_list, bucket_value);
148
136
  auto count_value = Value::CreateValue(entry.second);
149
- ListVector::PushBack(*count_list, count_value);
137
+ auto struct_value =
138
+ Value::STRUCT({std::make_pair("key", bucket_value), std::make_pair("value", count_value)});
139
+ ListVector::PushBack(result, struct_value);
150
140
  }
151
141
 
152
- auto list_struct_data = FlatVector::GetData<list_entry_t>(*bucket_list);
153
- list_struct_data[rid].length = ListVector::GetListSize(*bucket_list) - old_len;
154
- list_struct_data[rid].offset = old_len;
155
-
156
- list_struct_data = FlatVector::GetData<list_entry_t>(*count_list);
157
- list_struct_data[rid].length = ListVector::GetListSize(*count_list) - old_len;
142
+ auto list_struct_data = ListVector::GetData(result);
143
+ list_struct_data[rid].length = ListVector::GetListSize(result) - old_len;
158
144
  list_struct_data[rid].offset = old_len;
159
145
  old_len += list_struct_data[rid].length;
160
146
  }
@@ -171,10 +157,7 @@ unique_ptr<FunctionData> HistogramBindFunction(ClientContext &context, Aggregate
171
157
  throw NotImplementedException("Unimplemented type for histogram %s", arguments[0]->return_type.ToString());
172
158
  }
173
159
 
174
- child_list_t<LogicalType> struct_children;
175
- struct_children.push_back({"key", LogicalType::LIST(arguments[0]->return_type)});
176
- struct_children.push_back({"value", LogicalType::LIST(LogicalType::UBIGINT)});
177
- auto struct_type = LogicalType::MAP(move(struct_children));
160
+ auto struct_type = LogicalType::MAP(arguments[0]->return_type, LogicalType::UBIGINT);
178
161
 
179
162
  function.return_type = struct_type;
180
163
  return make_unique<VariableReturnBindData>(function.return_type);
@@ -12,7 +12,7 @@ unique_ptr<BoundCastData> ListBoundCastData::BindListToListCast(BindCastInput &i
12
12
  return make_unique<ListBoundCastData>(move(child_cast));
13
13
  }
14
14
 
15
- static bool ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
15
+ bool ListCast::ListToListCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
16
16
  auto &cast_data = (ListBoundCastData &)*parameters.cast_data;
17
17
 
18
18
  // only handle constant and flat vectors here for now
@@ -53,7 +53,7 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
53
53
  auto constant = source.GetVectorType() == VectorType::CONSTANT_VECTOR;
54
54
  // first cast the child vector to varchar
55
55
  Vector varchar_list(LogicalType::LIST(LogicalType::VARCHAR), count);
56
- ListToListCast(source, varchar_list, count, parameters);
56
+ ListCast::ListToListCast(source, varchar_list, count, parameters);
57
57
 
58
58
  // now construct the actual varchar vector
59
59
  varchar_list.Flatten(count);
@@ -116,7 +116,7 @@ static bool ListToVarcharCast(Vector &source, Vector &result, idx_t count, CastP
116
116
  BoundCastInfo DefaultCasts::ListCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
117
117
  switch (target.id()) {
118
118
  case LogicalTypeId::LIST:
119
- return BoundCastInfo(ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
119
+ return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
120
120
  case LogicalTypeId::VARCHAR:
121
121
  case LogicalTypeId::JSON:
122
122
  return BoundCastInfo(ListToVarcharCast, ListBoundCastData::BindListToListCast(
@@ -3,75 +3,28 @@
3
3
 
4
4
  namespace duckdb {
5
5
 
6
- struct MapBoundCastData : public BoundCastData {
7
- MapBoundCastData(BoundCastInfo key_cast, BoundCastInfo value_cast)
8
- : key_cast(move(key_cast)), value_cast(move(value_cast)) {
9
- }
10
-
11
- BoundCastInfo key_cast;
12
- BoundCastInfo value_cast;
13
-
14
- public:
15
- unique_ptr<BoundCastData> Copy() const override {
16
- return make_unique<MapBoundCastData>(key_cast.Copy(), value_cast.Copy());
17
- }
18
- };
19
-
20
- unique_ptr<BoundCastData> BindMapToMapCast(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
21
- vector<BoundCastInfo> child_cast_info;
22
- auto source_key = LogicalType::LIST(MapType::KeyType(source));
23
- auto target_key = LogicalType::LIST(MapType::KeyType(target));
24
- auto source_val = LogicalType::LIST(MapType::ValueType(source));
25
- auto target_val = LogicalType::LIST(MapType::ValueType(target));
26
- auto key_cast = input.GetCastFunction(source_key, target_key);
27
- auto value_cast = input.GetCastFunction(source_val, target_val);
28
- return make_unique<MapBoundCastData>(move(key_cast), move(value_cast));
29
- }
30
-
31
- static bool MapToMapCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
32
- auto &cast_data = (MapBoundCastData &)*parameters.cast_data;
33
- CastParameters key_params(parameters, cast_data.key_cast.cast_data.get());
34
- if (!cast_data.key_cast.function(MapVector::GetKeys(source), MapVector::GetKeys(result), count, key_params)) {
35
- return false;
36
- }
37
- CastParameters val_params(parameters, cast_data.value_cast.cast_data.get());
38
- if (!cast_data.value_cast.function(MapVector::GetValues(source), MapVector::GetValues(result), count, val_params)) {
39
- return false;
40
- }
41
- if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) {
42
- result.SetVectorType(VectorType::CONSTANT_VECTOR);
43
- ConstantVector::SetNull(result, ConstantVector::IsNull(source));
44
- } else {
45
- source.Flatten(count);
46
- FlatVector::Validity(result) = FlatVector::Validity(source);
47
- }
48
- return true;
49
- }
50
-
51
6
  static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
52
7
  auto constant = source.GetVectorType() == VectorType::CONSTANT_VECTOR;
53
- // first cast the child elements to varchar
54
8
  auto varchar_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
55
9
  Vector varchar_map(varchar_type, count);
56
- MapToMapCast(source, varchar_map, count, parameters);
57
10
 
58
- // now construct the actual varchar vector
59
- varchar_map.Flatten(count);
11
+ // since map's physical type is a list, the ListCast can be utilized
12
+ ListCast::ListToListCast(source, varchar_map, count, parameters);
60
13
 
14
+ varchar_map.Flatten(count);
61
15
  auto &validity = FlatVector::Validity(varchar_map);
62
- auto &key_lists = MapVector::GetKeys(varchar_map);
63
- auto &val_lists = MapVector::GetValues(varchar_map);
64
- auto &key_str = ListVector::GetEntry(key_lists);
65
- auto &val_str = ListVector::GetEntry(val_lists);
16
+ auto &key_str = MapVector::GetKeys(varchar_map);
17
+ auto &val_str = MapVector::GetValues(varchar_map);
66
18
 
67
- key_str.Flatten(ListVector::GetListSize(key_lists));
68
- val_str.Flatten(ListVector::GetListSize(val_lists));
19
+ key_str.Flatten(ListVector::GetListSize(source));
20
+ val_str.Flatten(ListVector::GetListSize(source));
69
21
 
70
- auto list_data = FlatVector::GetData<list_entry_t>(key_lists);
22
+ auto list_data = ListVector::GetData(varchar_map);
71
23
  auto key_data = FlatVector::GetData<string_t>(key_str);
72
24
  auto val_data = FlatVector::GetData<string_t>(val_str);
73
25
  auto &key_validity = FlatVector::Validity(key_str);
74
26
  auto &val_validity = FlatVector::Validity(val_str);
27
+ auto &struct_validity = FlatVector::Validity(ListVector::GetEntry(varchar_map));
75
28
 
76
29
  auto result_data = FlatVector::GetData<string_t>(result);
77
30
  for (idx_t i = 0; i < count; i++) {
@@ -86,8 +39,15 @@ static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastPa
86
39
  ret += ", ";
87
40
  }
88
41
  auto idx = list.offset + list_idx;
42
+
43
+ if (!struct_validity.RowIsValid(idx)) {
44
+ ret += "NULL";
45
+ continue;
46
+ }
89
47
  if (!key_validity.RowIsValid(idx)) {
90
- throw InternalException("Error in map: key validity invalid?!");
48
+ // throw InternalException("Error in map: key validity invalid?!");
49
+ ret += "invalid";
50
+ continue;
91
51
  }
92
52
  ret += key_data[idx].GetString();
93
53
  ret += "=";
@@ -106,12 +66,11 @@ static bool MapToVarcharCast(Vector &source, Vector &result, idx_t count, CastPa
106
66
  BoundCastInfo DefaultCasts::MapCastSwitch(BindCastInput &input, const LogicalType &source, const LogicalType &target) {
107
67
  switch (target.id()) {
108
68
  case LogicalTypeId::MAP:
109
- return BoundCastInfo(MapToMapCast, BindMapToMapCast(input, source, target));
69
+ return BoundCastInfo(ListCast::ListToListCast, ListBoundCastData::BindListToListCast(input, source, target));
110
70
  case LogicalTypeId::JSON:
111
71
  case LogicalTypeId::VARCHAR: {
112
- // bind a cast in which we convert the key/value to VARCHAR entries
113
72
  auto varchar_type = LogicalType::MAP(LogicalType::VARCHAR, LogicalType::VARCHAR);
114
- return BoundCastInfo(MapToVarcharCast, BindMapToMapCast(input, source, varchar_type));
73
+ return BoundCastInfo(MapToVarcharCast, ListBoundCastData::BindListToListCast(input, source, varchar_type));
115
74
  }
116
75
  default:
117
76
  return TryVectorNullCast;
@@ -140,7 +140,6 @@ static void ListContainsOrPosition(DataChunk &args, ExpressionState &state, Vect
140
140
  case PhysicalType::VARCHAR:
141
141
  TemplatedContainsOrPosition<string_t, T, OP>(args, state, result);
142
142
  break;
143
- case PhysicalType::MAP:
144
143
  case PhysicalType::STRUCT:
145
144
  case PhysicalType::LIST:
146
145
  TemplatedContainsOrPosition<int8_t, T, OP>(args, state, result, true);
@@ -8,17 +8,14 @@ namespace duckdb {
8
8
 
9
9
  static void CardinalityFunction(DataChunk &args, ExpressionState &state, Vector &result) {
10
10
  auto &map = args.data[0];
11
- UnifiedVectorFormat list_data;
12
11
  UnifiedVectorFormat map_data;
13
12
  result.SetVectorType(VectorType::FLAT_VECTOR);
14
13
  auto result_data = FlatVector::GetData<uint64_t>(result);
15
14
  auto &result_validity = FlatVector::Validity(result);
16
15
 
17
16
  map.ToUnifiedFormat(args.size(), map_data);
18
- auto &children = StructVector::GetEntries(map);
19
- children[0]->ToUnifiedFormat(args.size(), list_data);
20
17
  for (idx_t row = 0; row < args.size(); row++) {
21
- auto list_entry = ((list_entry_t *)list_data.data)[list_data.sel->get_index(row)];
18
+ auto list_entry = ((list_entry_t *)map_data.data)[map_data.sel->get_index(row)];
22
19
  result_data[row] = list_entry.length;
23
20
  result_validity.Set(row, map_data.validity.RowIsValid(map_data.sel->get_index(row)));
24
21
  }
@@ -2,31 +2,25 @@
2
2
  #include "duckdb/common/string_util.hpp"
3
3
  #include "duckdb/parser/expression/bound_expression.hpp"
4
4
  #include "duckdb/function/scalar/nested_functions.hpp"
5
- #include "duckdb/function/aggregate/nested_functions.hpp"
6
5
  #include "duckdb/common/types/data_chunk.hpp"
7
6
  #include "duckdb/common/pair.hpp"
8
7
  #include "duckdb/common/types/value_map.hpp"
9
8
 
10
9
  namespace duckdb {
11
10
 
12
- // TODO: this doesn't recursively verify maps if maps are nested
13
11
  MapInvalidReason CheckMapValidity(Vector &map, idx_t count, const SelectionVector &sel) {
14
12
  D_ASSERT(map.GetType().id() == LogicalTypeId::MAP);
15
13
  UnifiedVectorFormat map_vdata;
14
+
16
15
  map.ToUnifiedFormat(count, map_vdata);
17
16
  auto &map_validity = map_vdata.validity;
18
17
 
19
- auto &key_vector = *(StructVector::GetEntries(map)[0]);
18
+ auto list_data = ListVector::GetData(map);
19
+ auto &keys = MapVector::GetKeys(map);
20
20
  UnifiedVectorFormat key_vdata;
21
- key_vector.ToUnifiedFormat(count, key_vdata);
22
- auto key_data = (list_entry_t *)key_vdata.data;
21
+ keys.ToUnifiedFormat(count, key_vdata);
23
22
  auto &key_validity = key_vdata.validity;
24
23
 
25
- auto &key_entries = ListVector::GetEntry(key_vector);
26
- UnifiedVectorFormat key_entry_vdata;
27
- key_entries.ToUnifiedFormat(count, key_entry_vdata);
28
- auto &entry_validity = key_entry_vdata.validity;
29
-
30
24
  for (idx_t row = 0; row < count; row++) {
31
25
  auto mapped_row = sel.get_index(row);
32
26
  auto row_idx = map_vdata.sel->get_index(mapped_row);
@@ -35,17 +29,14 @@ MapInvalidReason CheckMapValidity(Vector &map, idx_t count, const SelectionVecto
35
29
  continue;
36
30
  }
37
31
  row_idx = key_vdata.sel->get_index(row);
38
- if (!key_validity.RowIsValid(row_idx)) {
39
- return MapInvalidReason::NULL_KEY_LIST;
40
- }
41
32
  value_set_t unique_keys;
42
- for (idx_t i = 0; i < key_data[row_idx].length; i++) {
43
- auto index = key_data[row_idx].offset + i;
44
- index = key_entry_vdata.sel->get_index(index);
45
- if (!entry_validity.RowIsValid(index)) {
33
+ for (idx_t i = 0; i < list_data[row_idx].length; i++) {
34
+ auto index = list_data[row_idx].offset + i;
35
+ index = key_vdata.sel->get_index(index);
36
+ if (!key_validity.RowIsValid(index)) {
46
37
  return MapInvalidReason::NULL_KEY;
47
38
  }
48
- auto value = key_entries.GetValue(index);
39
+ auto value = keys.GetValue(index);
49
40
  auto result = unique_keys.insert(value);
50
41
  if (!result.second) {
51
42
  return MapInvalidReason::DUPLICATE_KEY;
@@ -86,28 +77,19 @@ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result)
86
77
  }
87
78
  }
88
79
 
89
- auto &child_entries = StructVector::GetEntries(result);
90
- D_ASSERT(child_entries.size() == 2);
91
- auto &key_vector = *child_entries[0];
92
- auto &value_vector = *child_entries[1];
93
- if (args.data.empty()) {
94
- // no arguments: construct an empty map
95
- ListVector::SetListSize(key_vector, 0);
96
- key_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
97
- auto list_data = ConstantVector::GetData<list_entry_t>(key_vector);
98
- list_data->offset = 0;
99
- list_data->length = 0;
80
+ auto &key_vector = MapVector::GetKeys(result);
81
+ auto &value_vector = MapVector::GetValues(result);
82
+ auto list_data = ListVector::GetData(result);
100
83
 
101
- ListVector::SetListSize(value_vector, 0);
102
- value_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
103
- list_data = ConstantVector::GetData<list_entry_t>(value_vector);
84
+ if (args.data.empty()) {
85
+ ListVector::SetListSize(result, 0);
104
86
  list_data->offset = 0;
105
87
  list_data->length = 0;
106
-
107
88
  result.Verify(args.size());
108
89
  return;
109
90
  }
110
91
 
92
+ auto args_data = ListVector::GetData(args.data[0]);
111
93
  auto key_count = ListVector::GetListSize(args.data[0]);
112
94
  auto value_count = ListVector::GetListSize(args.data[1]);
113
95
  if (key_count != value_count) {
@@ -115,11 +97,16 @@ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result)
115
97
  "Error in MAP creation: key list has a different size from value list (%lld keys, %lld values)", key_count,
116
98
  value_count);
117
99
  }
100
+ ListVector::Reserve(result, key_count);
101
+ ListVector::SetListSize(result, key_count);
118
102
 
119
- key_vector.Reference(args.data[0]);
120
- value_vector.Reference(args.data[1]);
121
- MapConversionVerify(result, args.size());
103
+ for (idx_t i = 0; i < args.size(); i++) {
104
+ list_data[i] = args_data[i];
105
+ }
122
106
 
107
+ key_vector.Reference(ListVector::GetEntry(args.data[0]));
108
+ value_vector.Reference(ListVector::GetEntry(args.data[1]));
109
+ MapConversionVerify(result, args.size());
123
110
  result.Verify(args.size());
124
111
  }
125
112
 
@@ -147,8 +134,9 @@ static unique_ptr<FunctionData> MapBind(ClientContext &context, ScalarFunction &
147
134
  child_types.push_back(make_pair("value", empty));
148
135
  }
149
136
 
150
- //! this is more for completeness reasons
151
- bound_function.return_type = LogicalType::MAP(move(child_types));
137
+ bound_function.return_type =
138
+ LogicalType::MAP(ListType::GetChildType(child_types[0].second), ListType::GetChildType(child_types[1].second));
139
+
152
140
  return make_unique<VariableReturnBindData>(bound_function.return_type);
153
141
  }
154
142
 
@@ -27,7 +27,6 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
27
27
 
28
28
  if (args.data[1].GetType().id() == LogicalTypeId::SQLNULL) {
29
29
  //! We don't need to look through the map if the 'key' to look for is NULL
30
- //! Because maps can't have NULL as key
31
30
  ListVector::SetListSize(result, 0);
32
31
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
33
32
  auto list_data = ConstantVector::GetData<list_entry_t>(result);
@@ -40,21 +39,23 @@ static void MapExtractFunction(DataChunk &args, ExpressionState &state, Vector &
40
39
  auto &map = args.data[0];
41
40
  auto &key = args.data[1];
42
41
 
43
- UnifiedVectorFormat map_keys_data;
42
+ UnifiedVectorFormat map_data;
44
43
  UnifiedVectorFormat key_data;
45
44
 
46
45
  auto &map_keys = MapVector::GetKeys(map);
47
46
  auto &map_values = MapVector::GetValues(map);
48
47
 
49
- map_keys.ToUnifiedFormat(args.size(), map_keys_data);
48
+ map.ToUnifiedFormat(args.size(), map_data);
50
49
  key.ToUnifiedFormat(args.size(), key_data);
51
50
 
52
51
  for (idx_t row = 0; row < args.size(); row++) {
53
- idx_t row_index = map_keys_data.sel->get_index(row);
52
+ idx_t row_index = map_data.sel->get_index(row);
54
53
  idx_t key_index = key_data.sel->get_index(row);
55
54
  auto key_value = key.GetValue(key_index);
56
- auto offsets = ListVector::Search(map_keys, key_value, row_index);
57
- auto values = ListVector::GetValuesFromOffsets(map_values, offsets);
55
+
56
+ list_entry_t entry = ListVector::GetData(map)[row_index];
57
+ auto offsets = MapVector::Search(map_keys, args.size(), key_value, entry);
58
+ auto values = FlatVector::GetValuesFromOffsets(map_values, offsets);
58
59
  FillResult(values, result, row);
59
60
  }
60
61
 
@@ -3,124 +3,14 @@
3
3
  #include "duckdb/parser/expression/bound_expression.hpp"
4
4
  #include "duckdb/function/scalar/nested_functions.hpp"
5
5
  #include "duckdb/common/types/data_chunk.hpp"
6
- #include "duckdb/common/pair.hpp"
7
6
 
8
7
  namespace duckdb {
9
8
 
10
- struct VectorInfo {
11
- Vector &container;
12
- list_entry_t &data;
13
- };
14
-
15
- static void MapStruct(Value &element, VectorInfo &keys, VectorInfo &values) {
16
- D_ASSERT(element.type().id() == LogicalTypeId::STRUCT);
17
- D_ASSERT(!element.IsNull());
18
- auto &key_value = StructValue::GetChildren(element);
19
- auto &key = key_value[0];
20
- auto &value = key_value[1];
21
-
22
- if (key.IsNull()) {
23
- throw InvalidInputException("None of the keys of the map can be NULL");
24
- }
25
- // Add to the inner key/value lists of the resulting map
26
- ListVector::PushBack(keys.container, key);
27
- ListVector::PushBack(values.container, value);
28
- }
29
-
30
- // FIXME: this operation has a time complexity of O(n^2)
31
- void CheckKeyUniqueness(VectorInfo &keys) {
32
- auto end = keys.data.offset + keys.data.length;
33
- auto &entries = ListVector::GetEntry(keys.container);
34
- for (auto lhs = keys.data.offset; lhs < end; lhs++) {
35
- auto element = entries.GetValue(lhs);
36
- D_ASSERT(!element.IsNull());
37
- for (auto rhs = lhs + 1; rhs < end; rhs++) {
38
- auto other = entries.GetValue(rhs);
39
- D_ASSERT(!other.IsNull());
40
-
41
- if (element.type() != other.type()) {
42
- throw InvalidInputException("Not all keys are of the same type!");
43
- }
44
- if (element == other) {
45
- throw InvalidInputException("The given keys aren't unique");
46
- }
47
- }
48
- }
49
- }
50
-
51
- static bool MapSingleList(VectorInfo &input, VectorInfo &keys, VectorInfo &values) {
52
- // Get the length and offset of this list from the argument data
53
- auto pair_amount = input.data.length;
54
- auto input_offset = input.data.offset;
55
-
56
- // Loop over the list of structs
57
- idx_t inserted_values = 0;
58
- for (idx_t i = 0; i < pair_amount; i++) {
59
- auto index = i + input_offset;
60
- // Get the struct using the offset and the index;
61
- auto element = input.container.GetValue(index);
62
- if (element.IsNull()) {
63
- continue;
64
- }
65
- MapStruct(element, keys, values);
66
- inserted_values++;
67
- }
68
- // Set the length of the key value lists
69
- keys.data.length = inserted_values;
70
- values.data.length = inserted_values;
71
- return inserted_values != 0;
72
- }
73
-
74
9
  static void MapFromEntriesFunction(DataChunk &args, ExpressionState &state, Vector &result) {
75
- D_ASSERT(result.GetType().id() == LogicalTypeId::MAP);
76
-
77
- result.SetVectorType(duckdb::VectorType::FLAT_VECTOR);
78
-
79
- // Get the arguments vector
80
- auto &input_list = args.data[0];
81
- auto arg_data = FlatVector::GetData<list_entry_t>(input_list);
82
- auto &entries = ListVector::GetEntry(input_list);
83
-
84
- // Prepare the result vectors
85
- auto &child_entries = StructVector::GetEntries(result);
86
- D_ASSERT(child_entries.size() == 2);
87
- auto &key_vector = *child_entries[0];
88
- auto &value_vector = *child_entries[1];
89
- auto &result_validity = FlatVector::Validity(result);
90
-
91
- // Get the offset+length data for the list(s)
92
- auto key_data = FlatVector::GetData<list_entry_t>(key_vector);
93
- auto value_data = FlatVector::GetData<list_entry_t>(value_vector);
94
-
95
- auto &key_validity = FlatVector::Validity(key_vector);
96
- auto &value_validity = FlatVector::Validity(value_vector);
97
-
98
10
  auto count = args.size();
99
11
 
100
- UnifiedVectorFormat input_list_data;
101
- input_list.ToUnifiedFormat(count, input_list_data);
102
-
103
- // Current offset into the keys/values list
104
- idx_t offset = 0;
105
-
106
- // Transform to mapped values
107
- for (idx_t i = 0; i < count; i++) {
108
- VectorInfo input {entries, arg_data[i]};
109
- VectorInfo keys {key_vector, key_data[i]};
110
- VectorInfo values {value_vector, value_data[i]};
12
+ result.Reinterpret(args.data[0]);
111
13
 
112
- keys.data.offset = offset;
113
- values.data.offset = offset;
114
- auto row_valid = MapSingleList(input, keys, values);
115
- offset += keys.data.length;
116
-
117
- // Check validity
118
- if (!row_valid || !input_list_data.validity.RowIsValid(i)) {
119
- key_validity.SetInvalid(i);
120
- value_validity.SetInvalid(i);
121
- result_validity.SetInvalid(i);
122
- }
123
- }
124
14
  MapConversionVerify(result, count);
125
15
  result.Verify(count);
126
16
 
@@ -131,8 +21,6 @@ static void MapFromEntriesFunction(DataChunk &args, ExpressionState &state, Vect
131
21
 
132
22
  static unique_ptr<FunctionData> MapFromEntriesBind(ClientContext &context, ScalarFunction &bound_function,
133
23
  vector<unique_ptr<Expression>> &arguments) {
134
- child_list_t<LogicalType> child_types;
135
-
136
24
  if (arguments.size() != 1) {
137
25
  throw InvalidInputException("The input argument must be a list of structs.");
138
26
  }
@@ -155,11 +43,8 @@ static unique_ptr<FunctionData> MapFromEntriesBind(ClientContext &context, Scala
155
43
  if (children.size() != 2) {
156
44
  throw InvalidInputException("The provided struct type should only contain 2 fields, a key and a value");
157
45
  }
158
- child_types.push_back(make_pair("key", LogicalType::LIST(children[0].second)));
159
- child_types.push_back(make_pair("value", LogicalType::LIST(children[1].second)));
160
46
 
161
- //! this is more for completeness reasons
162
- bound_function.return_type = LogicalType::MAP(move(child_types));
47
+ bound_function.return_type = LogicalType::MAP(elem_type);
163
48
  return make_unique<VariableReturnBindData>(bound_function.return_type);
164
49
  }
165
50
 
@@ -219,6 +219,7 @@ static unique_ptr<FunctionData> BindAggregateState(ClientContext &context, Scala
219
219
  // FIXME: this is really hacky
220
220
  // but the aggregate state export needs a rework around how it handles more complex aggregates anyway
221
221
  vector<unique_ptr<Expression>> args;
222
+ args.reserve(state_type.bound_argument_types.size());
222
223
  for (auto &arg_type : state_type.bound_argument_types) {
223
224
  args.push_back(make_unique<BoundConstantExpression>(Value(arg_type)));
224
225
  }
@@ -302,7 +303,6 @@ ExportAggregateFunction::Bind(unique_ptr<BoundAggregateExpression> child_aggrega
302
303
  // this should be required
303
304
  D_ASSERT(bound_function.state_size);
304
305
  D_ASSERT(bound_function.finalize);
305
- D_ASSERT(!bound_function.window);
306
306
 
307
307
  D_ASSERT(child_aggregate->function.return_type.id() != LogicalTypeId::INVALID);
308
308
  #ifdef DEBUG
@@ -124,17 +124,13 @@ LogicalType ArrowTableFunction::GetArrowLogicalType(
124
124
  return LogicalType::STRUCT(move(child_types));
125
125
 
126
126
  } else if (format == "+m") {
127
- child_list_t<LogicalType> child_types;
128
- //! First type will be struct, so we skip it
129
- auto &struct_schema = *schema.children[0];
130
- for (idx_t type_idx = 0; type_idx < (idx_t)struct_schema.n_children; type_idx++) {
131
- //! The other types must be added on lists
132
- auto child_type = GetArrowLogicalType(*struct_schema.children[type_idx], arrow_convert_data, col_idx);
127
+ arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
133
128
 
134
- auto list_type = LogicalType::LIST(child_type);
135
- child_types.push_back({struct_schema.children[type_idx]->name, list_type});
136
- }
137
- return LogicalType::MAP(move(child_types));
129
+ auto &arrow_struct_type = *schema.children[0];
130
+ D_ASSERT(arrow_struct_type.n_children == 2);
131
+ auto key_type = GetArrowLogicalType(*arrow_struct_type.children[0], arrow_convert_data, col_idx);
132
+ auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1], arrow_convert_data, col_idx);
133
+ return LogicalType::MAP(key_type, value_type);
138
134
  } else if (format == "z") {
139
135
  arrow_convert_data[col_idx]->variable_sz_type.emplace_back(ArrowVariableSizeType::NORMAL, 0);
140
136
  return LogicalType::BLOB;
@@ -227,47 +227,6 @@ void ArrowToDuckDBMapVerify(Vector &vector, idx_t count) {
227
227
  }
228
228
  }
229
229
 
230
- void ArrowToDuckDBMapList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size,
231
- unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data, idx_t col_idx,
232
- pair<idx_t, idx_t> &arrow_convert_idx, uint32_t *offsets, ValidityMask *parent_mask) {
233
- idx_t list_size = offsets[size] - offsets[0];
234
- ListVector::Reserve(vector, list_size);
235
-
236
- auto &child_vector = ListVector::GetEntry(vector);
237
- auto list_data = FlatVector::GetData<list_entry_t>(vector);
238
- auto cur_offset = 0;
239
- for (idx_t i = 0; i < size; i++) {
240
- auto &le = list_data[i];
241
- le.offset = cur_offset;
242
- le.length = offsets[i + 1] - offsets[i];
243
- cur_offset += le.length;
244
- }
245
- ListVector::SetListSize(vector, list_size);
246
- if (list_size == 0 && offsets[0] == 0) {
247
- SetValidityMask(child_vector, array, scan_state, list_size, -1);
248
- } else {
249
- SetValidityMask(child_vector, array, scan_state, list_size, offsets[0]);
250
- }
251
-
252
- auto &list_mask = FlatVector::Validity(vector);
253
- if (parent_mask) {
254
- //! Since this List is owned by a struct we must guarantee their validity map matches on Null
255
- if (!parent_mask->AllValid()) {
256
- for (idx_t i = 0; i < size; i++) {
257
- if (!parent_mask->RowIsValid(i)) {
258
- list_mask.SetInvalid(i);
259
- }
260
- }
261
- }
262
- }
263
- if (list_size == 0 && offsets[0] == 0) {
264
- ColumnArrowToDuckDB(child_vector, array, scan_state, list_size, arrow_convert_data, col_idx, arrow_convert_idx,
265
- -1);
266
- } else {
267
- ColumnArrowToDuckDB(child_vector, array, scan_state, list_size, arrow_convert_data, col_idx, arrow_convert_idx,
268
- offsets[0]);
269
- }
270
- }
271
230
  template <class T>
272
231
  static void SetVectorString(Vector &vector, idx_t size, char *cdata, T *offsets) {
273
232
  auto strings = FlatVector::GetData<string_t>(vector);
@@ -619,20 +578,8 @@ void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState
619
578
  break;
620
579
  }
621
580
  case LogicalTypeId::MAP: {
622
- //! Since this is a map we skip first child, because its a struct
623
- auto &struct_arrow = *array.children[0];
624
- auto &child_entries = StructVector::GetEntries(vector);
625
- D_ASSERT(child_entries.size() == 2);
626
- auto offsets = (uint32_t *)array.buffers[1] + array.offset + scan_state.chunk_offset;
627
- if (nested_offset != -1) {
628
- offsets = (uint32_t *)array.buffers[1] + nested_offset;
629
- }
630
- auto &struct_validity_mask = FlatVector::Validity(vector);
631
- //! Fill the children
632
- for (idx_t type_idx = 0; type_idx < (idx_t)struct_arrow.n_children; type_idx++) {
633
- ArrowToDuckDBMapList(*child_entries[type_idx], *struct_arrow.children[type_idx], scan_state, size,
634
- arrow_convert_data, col_idx, arrow_convert_idx, offsets, &struct_validity_mask);
635
- }
581
+ ArrowToDuckDBList(vector, array, scan_state, size, arrow_convert_data, col_idx, arrow_convert_idx,
582
+ nested_offset, parent_mask);
636
583
  ArrowToDuckDBMapVerify(vector, size);
637
584
  break;
638
585
  }