duckdb 0.6.2-dev2115.0 → 0.6.2-dev2226.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -5
  3. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +6 -1
  4. package/src/duckdb/extension/json/include/json_common.hpp +1 -0
  5. package/src/duckdb/extension/json/include/json_scan.hpp +7 -0
  6. package/src/duckdb/extension/json/include/json_transform.hpp +25 -10
  7. package/src/duckdb/extension/json/json_common.cpp +6 -2
  8. package/src/duckdb/extension/json/json_functions/json_structure.cpp +47 -9
  9. package/src/duckdb/extension/json/json_functions/json_transform.cpp +183 -106
  10. package/src/duckdb/extension/json/json_functions/read_json.cpp +35 -22
  11. package/src/duckdb/extension/json/json_scan.cpp +26 -5
  12. package/src/duckdb/extension/parquet/parquet-extension.cpp +1 -0
  13. package/src/duckdb/src/catalog/catalog.cpp +11 -12
  14. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  15. package/src/duckdb/src/common/box_renderer.cpp +9 -1
  16. package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
  17. package/src/duckdb/src/common/enums/relation_type.cpp +2 -0
  18. package/src/duckdb/src/common/gzip_file_system.cpp +1 -1
  19. package/src/duckdb/src/common/local_file_system.cpp +1 -1
  20. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +2 -2
  21. package/src/duckdb/src/common/types/column_data_allocator.cpp +2 -2
  22. package/src/duckdb/src/common/types/date.cpp +7 -2
  23. package/src/duckdb/src/common/types/vector.cpp +3 -2
  24. package/src/duckdb/src/common/virtual_file_system.cpp +1 -1
  25. package/src/duckdb/src/execution/index/art/art.cpp +5 -5
  26. package/src/duckdb/src/execution/join_hashtable.cpp +4 -5
  27. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +2 -0
  28. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +182 -123
  29. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +22 -18
  30. package/src/duckdb/src/execution/physical_plan/plan_create_table.cpp +1 -1
  31. package/src/duckdb/src/function/aggregate/distributive/arg_min_max.cpp +2 -3
  32. package/src/duckdb/src/function/scalar/math/setseed.cpp +1 -1
  33. package/src/duckdb/src/function/scalar/string/substring.cpp +8 -0
  34. package/src/duckdb/src/function/table/read_csv.cpp +1 -1
  35. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  36. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
  37. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +4 -0
  38. package/src/duckdb/src/include/duckdb/common/enums/relation_type.hpp +1 -0
  39. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
  40. package/src/duckdb/src/include/duckdb/common/http_stats.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/common/limits.hpp +3 -0
  42. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +1 -9
  43. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +2 -2
  44. package/src/duckdb/src/include/duckdb/execution/executor.hpp +3 -0
  45. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +3 -3
  46. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_unnest.hpp +5 -1
  47. package/src/duckdb/src/include/duckdb/main/client_context.hpp +3 -0
  48. package/src/duckdb/src/include/duckdb/main/config.hpp +0 -4
  49. package/src/duckdb/src/include/duckdb/main/database.hpp +6 -0
  50. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +5 -5
  51. package/src/duckdb/src/include/duckdb/main/relation/write_csv_relation.hpp +2 -1
  52. package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +34 -0
  53. package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -1
  54. package/src/duckdb/src/include/duckdb/parser/parsed_data/copy_info.hpp +2 -1
  55. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
  56. package/src/duckdb/src/include/duckdb/planner/binder.hpp +1 -1
  57. package/src/duckdb/src/include/duckdb/storage/index.hpp +4 -3
  58. package/src/duckdb/src/include/duckdb.h +7 -0
  59. package/src/duckdb/src/main/capi/threading-c.cpp +8 -0
  60. package/src/duckdb/src/main/client_context.cpp +7 -0
  61. package/src/duckdb/src/main/client_context_file_opener.cpp +14 -0
  62. package/src/duckdb/src/main/database.cpp +57 -40
  63. package/src/duckdb/src/main/extension/extension_load.cpp +20 -28
  64. package/src/duckdb/src/main/relation/write_csv_relation.cpp +4 -2
  65. package/src/duckdb/src/main/relation/write_parquet_relation.cpp +37 -0
  66. package/src/duckdb/src/main/relation.cpp +12 -2
  67. package/src/duckdb/src/parallel/executor.cpp +4 -0
  68. package/src/duckdb/src/parser/statement/copy_statement.cpp +1 -1
  69. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +4 -3
  70. package/src/duckdb/src/planner/binder/expression/bind_cast_expression.cpp +1 -1
  71. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +24 -3
  72. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -1
  73. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +2 -0
  74. package/src/duckdb/src/storage/compression/bitpacking.cpp +2 -1
  75. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +1 -1
  76. package/src/duckdb/src/storage/index.cpp +1 -1
  77. package/src/duckdb/src/storage/meta_block_writer.cpp +1 -1
  78. package/src/duckdb/src/storage/table/column_segment.cpp +3 -3
  79. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +1 -2
  80. package/src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp +539 -300
  81. package/src/duckdb/ub_src_main.cpp +0 -2
  82. package/src/duckdb/ub_src_main_relation.cpp +2 -0
  83. package/src/duckdb/src/include/duckdb/function/replacement_open.hpp +0 -54
  84. package/src/duckdb/src/include/duckdb/main/replacement_opens.hpp +0 -20
  85. package/src/duckdb/src/main/extension_prefix_opener.cpp +0 -55
@@ -11,7 +11,11 @@ namespace duckdb {
11
11
  class UnnestOperatorState : public OperatorState {
12
12
  public:
13
13
  UnnestOperatorState(ClientContext &context, const vector<unique_ptr<Expression>> &select_list)
14
- : parent_position(0), list_position(0), list_length(-1), first_fetch(true), executor(context) {
14
+ : current_row(0), list_position(0), longest_list_length(DConstants::INVALID_INDEX), first_fetch(true),
15
+ executor(context) {
16
+
17
+ // for each UNNEST in the select_list, we add the child expression to the expression executor
18
+ // and set the return type in the list_data chunk, which will contain the evaluated expression results
15
19
  vector<LogicalType> list_data_types;
16
20
  for (auto &exp : select_list) {
17
21
  D_ASSERT(exp->type == ExpressionType::BOUND_UNNEST);
@@ -19,6 +23,7 @@ public:
19
23
  list_data_types.push_back(bue->child->return_type);
20
24
  executor.AddExpression(*bue->child.get());
21
25
  }
26
+
22
27
  auto &allocator = Allocator::Get(context);
23
28
  list_data.Initialize(allocator, list_data_types);
24
29
 
@@ -26,18 +31,50 @@ public:
26
31
  list_child_data.resize(list_data.ColumnCount());
27
32
  }
28
33
 
29
- idx_t parent_position;
34
+ idx_t current_row;
30
35
  idx_t list_position;
31
- int64_t list_length;
36
+ idx_t longest_list_length;
32
37
  bool first_fetch;
33
38
 
34
39
  ExpressionExecutor executor;
35
40
  DataChunk list_data;
36
41
  vector<UnifiedVectorFormat> list_vector_data;
37
42
  vector<UnifiedVectorFormat> list_child_data;
43
+
44
+ public:
45
+ //! Reset the fields of the unnest operator state
46
+ void Reset();
47
+ //! Set the longest list's length for the current row
48
+ void SetLongestListLength();
38
49
  };
39
50
 
40
- // this implements a sorted window functions variant
51
+ void UnnestOperatorState::Reset() {
52
+ current_row = 0;
53
+ list_position = 0;
54
+ longest_list_length = DConstants::INVALID_INDEX;
55
+ first_fetch = true;
56
+ }
57
+
58
+ void UnnestOperatorState::SetLongestListLength() {
59
+
60
+ longest_list_length = 0;
61
+ for (idx_t col_idx = 0; col_idx < list_data.ColumnCount(); col_idx++) {
62
+
63
+ auto &vector_data = list_vector_data[col_idx];
64
+ auto current_idx = vector_data.sel->get_index(current_row);
65
+
66
+ if (vector_data.validity.RowIsValid(current_idx)) {
67
+
68
+ // check if this list is longer
69
+ auto list_data = (list_entry_t *)vector_data.data;
70
+ auto list_entry = list_data[current_idx];
71
+ if (list_entry.length > longest_list_length) {
72
+ longest_list_length = list_entry.length;
73
+ }
74
+ }
75
+ }
76
+ }
77
+
41
78
  PhysicalUnnest::PhysicalUnnest(vector<LogicalType> types, vector<unique_ptr<Expression>> select_list,
42
79
  idx_t estimated_cardinality, PhysicalOperatorType type)
43
80
  : PhysicalOperator(type, std::move(types), estimated_cardinality), select_list(std::move(select_list)) {
@@ -45,6 +82,8 @@ PhysicalUnnest::PhysicalUnnest(vector<LogicalType> types, vector<unique_ptr<Expr
45
82
  }
46
83
 
47
84
  static void UnnestNull(idx_t start, idx_t end, Vector &result) {
85
+
86
+ D_ASSERT(result.GetVectorType() == VectorType::FLAT_VECTOR);
48
87
  auto &validity = FlatVector::Validity(result);
49
88
  for (idx_t i = start; i < end; i++) {
50
89
  validity.SetInvalid(i);
@@ -58,14 +97,17 @@ static void UnnestNull(idx_t start, idx_t end, Vector &result) {
58
97
  }
59
98
 
60
99
  template <class T>
61
- static void TemplatedUnnest(UnifiedVectorFormat &vdata, idx_t start, idx_t end, Vector &result) {
62
- auto source_data = (T *)vdata.data;
63
- auto &source_mask = vdata.validity;
100
+ static void TemplatedUnnest(UnifiedVectorFormat &vector_data, idx_t start, idx_t end, Vector &result) {
101
+
102
+ auto source_data = (T *)vector_data.data;
103
+ auto &source_mask = vector_data.validity;
104
+
105
+ D_ASSERT(result.GetVectorType() == VectorType::FLAT_VECTOR);
64
106
  auto result_data = FlatVector::GetData<T>(result);
65
107
  auto &result_mask = FlatVector::Validity(result);
66
108
 
67
109
  for (idx_t i = start; i < end; i++) {
68
- auto source_idx = vdata.sel->get_index(i);
110
+ auto source_idx = vector_data.sel->get_index(i);
69
111
  auto target_idx = i - start;
70
112
  if (source_mask.RowIsValid(source_idx)) {
71
113
  result_data[target_idx] = source_data[source_idx];
@@ -76,84 +118,131 @@ static void TemplatedUnnest(UnifiedVectorFormat &vdata, idx_t start, idx_t end,
76
118
  }
77
119
  }
78
120
 
79
- static void UnnestValidity(UnifiedVectorFormat &vdata, idx_t start, idx_t end, Vector &result) {
80
- auto &source_mask = vdata.validity;
121
+ static void UnnestValidity(UnifiedVectorFormat &vector_data, idx_t start, idx_t end, Vector &result) {
122
+
123
+ auto &source_mask = vector_data.validity;
124
+ D_ASSERT(result.GetVectorType() == VectorType::FLAT_VECTOR);
81
125
  auto &result_mask = FlatVector::Validity(result);
82
126
 
83
127
  for (idx_t i = start; i < end; i++) {
84
- auto source_idx = vdata.sel->get_index(i);
128
+ auto source_idx = vector_data.sel->get_index(i);
85
129
  auto target_idx = i - start;
86
130
  result_mask.Set(target_idx, source_mask.RowIsValid(source_idx));
87
131
  }
88
132
  }
89
133
 
90
- static void UnnestVector(UnifiedVectorFormat &vdata, Vector &source, idx_t list_size, idx_t start, idx_t end,
91
- Vector &result) {
92
- D_ASSERT(source.GetType() == result.GetType());
134
+ static void UnnestVector(UnifiedVectorFormat &child_vector_data, Vector &child_vector, idx_t list_size, idx_t start,
135
+ idx_t end, Vector &result) {
136
+
137
+ D_ASSERT(child_vector.GetType() == result.GetType());
93
138
  switch (result.GetType().InternalType()) {
94
139
  case PhysicalType::BOOL:
95
140
  case PhysicalType::INT8:
96
- TemplatedUnnest<int8_t>(vdata, start, end, result);
141
+ TemplatedUnnest<int8_t>(child_vector_data, start, end, result);
97
142
  break;
98
143
  case PhysicalType::INT16:
99
- TemplatedUnnest<int16_t>(vdata, start, end, result);
144
+ TemplatedUnnest<int16_t>(child_vector_data, start, end, result);
100
145
  break;
101
146
  case PhysicalType::INT32:
102
- TemplatedUnnest<int32_t>(vdata, start, end, result);
147
+ TemplatedUnnest<int32_t>(child_vector_data, start, end, result);
103
148
  break;
104
149
  case PhysicalType::INT64:
105
- TemplatedUnnest<int64_t>(vdata, start, end, result);
150
+ TemplatedUnnest<int64_t>(child_vector_data, start, end, result);
106
151
  break;
107
152
  case PhysicalType::INT128:
108
- TemplatedUnnest<hugeint_t>(vdata, start, end, result);
153
+ TemplatedUnnest<hugeint_t>(child_vector_data, start, end, result);
109
154
  break;
110
155
  case PhysicalType::UINT8:
111
- TemplatedUnnest<uint8_t>(vdata, start, end, result);
156
+ TemplatedUnnest<uint8_t>(child_vector_data, start, end, result);
112
157
  break;
113
158
  case PhysicalType::UINT16:
114
- TemplatedUnnest<uint16_t>(vdata, start, end, result);
159
+ TemplatedUnnest<uint16_t>(child_vector_data, start, end, result);
115
160
  break;
116
161
  case PhysicalType::UINT32:
117
- TemplatedUnnest<uint32_t>(vdata, start, end, result);
162
+ TemplatedUnnest<uint32_t>(child_vector_data, start, end, result);
118
163
  break;
119
164
  case PhysicalType::UINT64:
120
- TemplatedUnnest<uint64_t>(vdata, start, end, result);
165
+ TemplatedUnnest<uint64_t>(child_vector_data, start, end, result);
121
166
  break;
122
167
  case PhysicalType::FLOAT:
123
- TemplatedUnnest<float>(vdata, start, end, result);
168
+ TemplatedUnnest<float>(child_vector_data, start, end, result);
124
169
  break;
125
170
  case PhysicalType::DOUBLE:
126
- TemplatedUnnest<double>(vdata, start, end, result);
171
+ TemplatedUnnest<double>(child_vector_data, start, end, result);
127
172
  break;
128
173
  case PhysicalType::INTERVAL:
129
- TemplatedUnnest<interval_t>(vdata, start, end, result);
174
+ TemplatedUnnest<interval_t>(child_vector_data, start, end, result);
130
175
  break;
131
176
  case PhysicalType::VARCHAR:
132
- TemplatedUnnest<string_t>(vdata, start, end, result);
177
+ TemplatedUnnest<string_t>(child_vector_data, start, end, result);
133
178
  break;
134
179
  case PhysicalType::LIST: {
180
+ // the child vector of result now references the child vector source
181
+ // FIXME: only reference relevant children (start - end) instead of all
135
182
  auto &target = ListVector::GetEntry(result);
136
- target.Reference(ListVector::GetEntry(source));
137
- ListVector::SetListSize(result, ListVector::GetListSize(source));
138
- TemplatedUnnest<list_entry_t>(vdata, start, end, result);
183
+ target.Reference(ListVector::GetEntry(child_vector));
184
+ ListVector::SetListSize(result, ListVector::GetListSize(child_vector));
185
+ // unnest
186
+ TemplatedUnnest<list_entry_t>(child_vector_data, start, end, result);
139
187
  break;
140
188
  }
141
189
  case PhysicalType::STRUCT: {
142
- auto &source_entries = StructVector::GetEntries(source);
143
- auto &target_entries = StructVector::GetEntries(result);
144
- UnnestValidity(vdata, start, end, result);
145
- for (idx_t i = 0; i < source_entries.size(); i++) {
146
- UnifiedVectorFormat sdata;
147
- source_entries[i]->ToUnifiedFormat(list_size, sdata);
148
- UnnestVector(sdata, *source_entries[i], list_size, start, end, *target_entries[i]);
190
+ auto &child_vector_entries = StructVector::GetEntries(child_vector);
191
+ auto &result_entries = StructVector::GetEntries(result);
192
+
193
+ // set the validity mask for the 'outer' struct vector before unnesting its children
194
+ UnnestValidity(child_vector_data, start, end, result);
195
+
196
+ for (idx_t i = 0; i < child_vector_entries.size(); i++) {
197
+ UnifiedVectorFormat child_vector_entries_data;
198
+ child_vector_entries[i]->ToUnifiedFormat(list_size, child_vector_entries_data);
199
+ UnnestVector(child_vector_entries_data, *child_vector_entries[i], list_size, start, end,
200
+ *result_entries[i]);
149
201
  }
150
202
  break;
151
203
  }
152
204
  default:
153
- throw InternalException("Unimplemented type for UNNEST");
205
+ throw InternalException("Unimplemented type for UNNEST.");
154
206
  }
155
207
  }
156
208
 
209
+ static void PrepareInput(UnnestOperatorState &state, DataChunk &input,
210
+ const vector<unique_ptr<Expression>> &select_list) {
211
+
212
+ state.list_data.Reset();
213
+ // execute the expressions inside each UNNEST in the select_list to get the list data
214
+ // execution results (lists) are kept in state.list_data chunk
215
+ state.executor.Execute(input, state.list_data);
216
+
217
+ // verify incoming lists
218
+ state.list_data.Verify();
219
+ D_ASSERT(input.size() == state.list_data.size());
220
+ D_ASSERT(state.list_data.ColumnCount() == select_list.size());
221
+ D_ASSERT(state.list_vector_data.size() == state.list_data.ColumnCount());
222
+ D_ASSERT(state.list_child_data.size() == state.list_data.ColumnCount());
223
+
224
+ // get the UnifiedVectorFormat of each list_data vector (LIST vectors for the different UNNESTs)
225
+ // both for the vector itself and its child vector
226
+ for (idx_t col_idx = 0; col_idx < state.list_data.ColumnCount(); col_idx++) {
227
+
228
+ auto &list_vector = state.list_data.data[col_idx];
229
+ list_vector.ToUnifiedFormat(state.list_data.size(), state.list_vector_data[col_idx]);
230
+
231
+ if (list_vector.GetType() == LogicalType::SQLNULL) {
232
+ // UNNEST(NULL): SQLNULL vectors don't have child vectors, but we need to point to the child vector of
233
+ // each vector, so we just get the UnifiedVectorFormat of the vector itself
234
+ auto &child_vector = list_vector;
235
+ child_vector.ToUnifiedFormat(0, state.list_child_data[col_idx]);
236
+ } else {
237
+ auto list_size = ListVector::GetListSize(list_vector);
238
+ auto &child_vector = ListVector::GetEntry(list_vector);
239
+ child_vector.ToUnifiedFormat(list_size, state.list_child_data[col_idx]);
240
+ }
241
+ }
242
+
243
+ state.first_fetch = false;
244
+ }
245
+
157
246
  unique_ptr<OperatorState> PhysicalUnnest::GetOperatorState(ExecutionContext &context) const {
158
247
  return PhysicalUnnest::GetState(context, select_list);
159
248
  }
@@ -167,137 +256,107 @@ OperatorResultType PhysicalUnnest::ExecuteInternal(ExecutionContext &context, Da
167
256
  OperatorState &state_p,
168
257
  const vector<unique_ptr<Expression>> &select_list,
169
258
  bool include_input) {
259
+
170
260
  auto &state = (UnnestOperatorState &)state_p;
261
+
171
262
  do {
263
+ // prepare the input data by executing any expressions and getting the
264
+ // UnifiedVectorFormat of each LIST vector (list_vector_data) and its child vector (list_child_data)
172
265
  if (state.first_fetch) {
173
- // get the list data to unnest
174
- state.list_data.Reset();
175
- state.executor.Execute(input, state.list_data);
176
-
177
- // paranoia aplenty
178
- state.list_data.Verify();
179
- D_ASSERT(input.size() == state.list_data.size());
180
- D_ASSERT(state.list_data.ColumnCount() == select_list.size());
181
- D_ASSERT(state.list_vector_data.size() == state.list_data.ColumnCount());
182
- D_ASSERT(state.list_child_data.size() == state.list_data.ColumnCount());
183
-
184
- // initialize UnifiedVectorFormat object so the nullmask can accessed
185
- for (idx_t col_idx = 0; col_idx < state.list_data.ColumnCount(); col_idx++) {
186
- auto &list_vector = state.list_data.data[col_idx];
187
- list_vector.ToUnifiedFormat(state.list_data.size(), state.list_vector_data[col_idx]);
188
-
189
- if (list_vector.GetType() == LogicalType::SQLNULL) {
190
- // UNNEST(NULL)
191
- auto &child_vector = list_vector;
192
- child_vector.ToUnifiedFormat(0, state.list_child_data[col_idx]);
193
- } else {
194
- auto list_size = ListVector::GetListSize(list_vector);
195
- auto &child_vector = ListVector::GetEntry(list_vector);
196
- child_vector.ToUnifiedFormat(list_size, state.list_child_data[col_idx]);
197
- }
198
- }
199
- state.first_fetch = false;
266
+ PrepareInput(state, input, select_list);
200
267
  }
201
- if (state.parent_position >= input.size()) {
202
- // finished with this input chunk
203
- state.parent_position = 0;
204
- state.list_position = 0;
205
- state.list_length = -1;
206
- state.first_fetch = true;
268
+
269
+ // finished with all rows of this input chunk, reset
270
+ if (state.current_row >= input.size()) {
271
+ state.Reset();
207
272
  return OperatorResultType::NEED_MORE_INPUT;
208
273
  }
209
274
 
210
- // need to figure out how many times we need to repeat for current row
211
- if (state.list_length < 0) {
212
- for (idx_t col_idx = 0; col_idx < state.list_data.ColumnCount(); col_idx++) {
213
- auto &vdata = state.list_vector_data[col_idx];
214
- auto current_idx = vdata.sel->get_index(state.parent_position);
215
-
216
- int64_t list_length;
217
- // deal with NULL values
218
- if (!vdata.validity.RowIsValid(current_idx)) {
219
- list_length = 0;
220
- } else {
221
- auto list_data = (list_entry_t *)vdata.data;
222
- auto list_entry = list_data[current_idx];
223
- list_length = (int64_t)list_entry.length;
224
- }
225
-
226
- if (list_length > state.list_length) {
227
- state.list_length = list_length;
228
- }
229
- }
275
+ // each UNNEST in the select_list contains a list (or NULL) for this row, find longest list
276
+ // because this length determines how many times we need to repeat for the current row
277
+ if (state.longest_list_length == DConstants::INVALID_INDEX) {
278
+ state.SetLongestListLength();
230
279
  }
280
+ D_ASSERT(state.longest_list_length != DConstants::INVALID_INDEX);
231
281
 
232
- D_ASSERT(state.list_length >= 0);
233
-
234
- auto this_chunk_len = MinValue<idx_t>(STANDARD_VECTOR_SIZE, state.list_length - state.list_position);
235
-
236
- // first cols are from child, last n cols from unnest
282
+ // we emit chunks of either STANDARD_VECTOR_SIZE or smaller
283
+ auto this_chunk_len = MinValue<idx_t>(STANDARD_VECTOR_SIZE, state.longest_list_length - state.list_position);
237
284
  chunk.SetCardinality(this_chunk_len);
238
285
 
239
- idx_t output_offset = 0;
286
+ // if we include other projection input columns, e.g. SELECT 1, UNNEST([1, 2]);, then
287
+ // we need to add them as a constant vector to the resulting chunk
288
+ // FIXME: emit multiple unnested rows. Currently, we never emit a chunk containing multiple unnested input rows,
289
+ // so setting a constant vector for the value at state.current_row is fine
290
+ idx_t col_offset = 0;
240
291
  if (include_input) {
241
292
  for (idx_t col_idx = 0; col_idx < input.ColumnCount(); col_idx++) {
242
- ConstantVector::Reference(chunk.data[col_idx], input.data[col_idx], state.parent_position,
243
- input.size());
293
+ ConstantVector::Reference(chunk.data[col_idx], input.data[col_idx], state.current_row, input.size());
244
294
  }
245
- output_offset = input.ColumnCount();
295
+ col_offset = input.ColumnCount();
246
296
  }
247
297
 
298
+ // unnest the lists
248
299
  for (idx_t col_idx = 0; col_idx < state.list_data.ColumnCount(); col_idx++) {
249
- auto &result_vector = chunk.data[col_idx + output_offset];
300
+
301
+ auto &result_vector = chunk.data[col_idx + col_offset];
250
302
 
251
303
  if (state.list_data.data[col_idx].GetType() == LogicalType::SQLNULL) {
252
304
  // UNNEST(NULL)
253
305
  chunk.SetCardinality(0);
306
+ break;
307
+
254
308
  } else {
255
- auto &vdata = state.list_vector_data[col_idx];
256
- auto &child_data = state.list_child_data[col_idx];
257
- auto current_idx = vdata.sel->get_index(state.parent_position);
258
309
 
259
- auto list_data = (list_entry_t *)vdata.data;
260
- auto list_entry = list_data[current_idx];
310
+ auto &vector_data = state.list_vector_data[col_idx];
311
+ auto current_idx = vector_data.sel->get_index(state.current_row);
312
+
313
+ if (!vector_data.validity.RowIsValid(current_idx)) {
314
+ UnnestNull(0, this_chunk_len, result_vector);
261
315
 
262
- idx_t list_count;
263
- if (state.list_position >= list_entry.length) {
264
- list_count = 0;
265
316
  } else {
266
- list_count = MinValue<idx_t>(this_chunk_len, list_entry.length - state.list_position);
267
- }
268
317
 
269
- if (list_entry.length > state.list_position) {
270
- if (!vdata.validity.RowIsValid(current_idx)) {
271
- UnnestNull(0, list_count, result_vector);
272
- } else {
318
+ auto list_data = (list_entry_t *)vector_data.data;
319
+ auto list_entry = list_data[current_idx];
320
+
321
+ idx_t list_count = 0;
322
+ if (state.list_position < list_entry.length) {
323
+ // there are still list_count elements to unnest
324
+ list_count = MinValue<idx_t>(this_chunk_len, list_entry.length - state.list_position);
325
+
273
326
  auto &list_vector = state.list_data.data[col_idx];
274
327
  auto &child_vector = ListVector::GetEntry(list_vector);
275
328
  auto list_size = ListVector::GetListSize(list_vector);
329
+ auto &child_vector_data = state.list_child_data[col_idx];
276
330
 
277
331
  auto base_offset = list_entry.offset + state.list_position;
278
- UnnestVector(child_data, child_vector, list_size, base_offset, base_offset + list_count,
332
+ UnnestVector(child_vector_data, child_vector, list_size, base_offset, base_offset + list_count,
279
333
  result_vector);
280
334
  }
281
- }
282
335
 
283
- UnnestNull(list_count, this_chunk_len, result_vector);
336
+ // fill the rest with NULLs
337
+ if (list_count != this_chunk_len) {
338
+ UnnestNull(list_count, this_chunk_len, result_vector);
339
+ }
340
+ }
284
341
  }
285
342
  }
286
343
 
344
+ chunk.Verify();
345
+
287
346
  state.list_position += this_chunk_len;
288
- if ((int64_t)state.list_position == state.list_length) {
289
- state.parent_position++;
290
- state.list_length = -1;
347
+ if (state.list_position == state.longest_list_length) {
348
+ state.current_row++;
349
+ state.longest_list_length = DConstants::INVALID_INDEX;
291
350
  state.list_position = 0;
292
351
  }
293
352
 
294
- chunk.Verify();
353
+ // we only emit one unnested row (that contains data) at a time
295
354
  } while (chunk.size() == 0);
296
355
  return OperatorResultType::HAVE_MORE_OUTPUT;
297
356
  }
298
357
 
299
358
  OperatorResultType PhysicalUnnest::Execute(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
300
- GlobalOperatorState &gstate, OperatorState &state) const {
359
+ GlobalOperatorState &, OperatorState &state) const {
301
360
  return ExecuteInternal(context, input, chunk, state, select_list);
302
361
  }
303
362
 
@@ -5,6 +5,7 @@
5
5
  #include "duckdb/main/attached_database.hpp"
6
6
  #include "duckdb/main/database.hpp"
7
7
  #include "duckdb/storage/storage_extension.hpp"
8
+ #include "duckdb/main/extension_helper.hpp"
8
9
 
9
10
  namespace duckdb {
10
11
 
@@ -55,11 +56,29 @@ void PhysicalAttach::GetData(ExecutionContext &context, DataChunk &chunk, Global
55
56
  unrecognized_option = entry.first;
56
57
  }
57
58
  }
59
+ auto &db = DatabaseInstance::GetDatabase(context.client);
60
+ if (type.empty()) {
61
+ // try to extract type from path
62
+ type = db.ExtractDatabaseType(info->path);
63
+ }
64
+ if (!type.empty()) {
65
+ type = ExtensionHelper::ApplyExtensionAlias(type);
66
+ }
67
+ if (type.empty() && !unrecognized_option.empty()) {
68
+ throw BinderException("Unrecognized option for attach \"%s\"", unrecognized_option);
69
+ }
70
+
71
+ // if we are loading a database type from an extension - check if that extension is loaded
72
+ if (!type.empty()) {
73
+ if (!db.ExtensionIsLoaded(type)) {
74
+ ExtensionHelper::LoadExternalExtension(context.client, type);
75
+ }
76
+ }
58
77
 
59
78
  // attach the database
60
- auto name = info->name;
79
+ auto &name = info->name;
61
80
  const auto &path = info->path;
62
- auto &db = DatabaseInstance::GetDatabase(context.client);
81
+
63
82
  if (name.empty()) {
64
83
  name = AttachedDatabase::ExtractDatabaseName(path);
65
84
  }
@@ -68,22 +87,7 @@ void PhysicalAttach::GetData(ExecutionContext &context, DataChunk &chunk, Global
68
87
  if (existing_db) {
69
88
  throw BinderException("Database \"%s\" is already attached with alias \"%s\"", path, existing_db->GetName());
70
89
  }
71
-
72
- unique_ptr<AttachedDatabase> new_db;
73
- if (type.empty()) {
74
- if (!unrecognized_option.empty()) {
75
- throw BinderException("Unrecognized option for attach \"%s\"", unrecognized_option);
76
- }
77
- new_db = make_unique<AttachedDatabase>(db, Catalog::GetSystemCatalog(db), name, path, access_mode);
78
- } else {
79
- // attach an extension database
80
- auto entry = config.storage_extensions.find(type);
81
- if (entry == config.storage_extensions.end()) {
82
- throw BinderException("Unrecognized storage type \"%s\"", type);
83
- }
84
- new_db =
85
- make_unique<AttachedDatabase>(db, Catalog::GetSystemCatalog(db), *entry->second, name, *info, access_mode);
86
- }
90
+ auto new_db = db.CreateAttachedDatabase(*info, type, access_mode);
87
91
  new_db->Initialize();
88
92
 
89
93
  db_manager.AddDatabase(context.client, std::move(new_db));
@@ -34,7 +34,7 @@ unique_ptr<PhysicalOperator> DuckCatalog::PlanCreateTableAs(ClientContext &conte
34
34
  }
35
35
 
36
36
  unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCreateTable &op) {
37
- auto &create_info = (CreateTableInfo &)*op.info->base;
37
+ const auto &create_info = (CreateTableInfo &)*op.info->base;
38
38
  auto &catalog = *op.info->schema->catalog;
39
39
  auto existing_entry = catalog.GetEntry<TableCatalogEntry>(context, create_info.schema, create_info.table, true);
40
40
  bool replace = op.info->Base().on_conflict == OnCreateConflict::REPLACE_ON_CONFLICT;
@@ -183,7 +183,6 @@ struct VectorArgMinMaxBase : ArgMinMaxBase<COMPARATOR> {
183
183
 
184
184
  auto states = (STATE **)sdata.data;
185
185
  for (idx_t i = 0; i < count; i++) {
186
- const auto aidx = adata.sel->get_index(i);
187
186
  const auto bidx = bdata.sel->get_index(i);
188
187
  if (!bdata.validity.RowIsValid(bidx)) {
189
188
  continue;
@@ -194,12 +193,12 @@ struct VectorArgMinMaxBase : ArgMinMaxBase<COMPARATOR> {
194
193
  auto state = states[sidx];
195
194
  if (!state->is_initialized) {
196
195
  STATE::template AssignValue<BY_TYPE>(state->value, bval, false);
197
- AssignVector(state, arg, aidx);
196
+ AssignVector(state, arg, i);
198
197
  state->is_initialized = true;
199
198
 
200
199
  } else if (COMPARATOR::template Operation<BY_TYPE>(bval, state->value)) {
201
200
  STATE::template AssignValue<BY_TYPE>(state->value, bval, true);
202
- AssignVector(state, arg, aidx);
201
+ AssignVector(state, arg, i);
203
202
  }
204
203
  }
205
204
  }
@@ -36,7 +36,7 @@ static void SetSeedFunction(DataChunk &args, ExpressionState &state, Vector &res
36
36
 
37
37
  auto &random_engine = RandomEngine::Get(info.context);
38
38
  for (idx_t i = 0; i < args.size(); i++) {
39
- if (input_seeds[i] < -1.0 || input_seeds[i] > 1.0) {
39
+ if (input_seeds[i] < -1.0 || input_seeds[i] > 1.0 || Value::IsNan(input_seeds[i])) {
40
40
  throw Exception("SETSEED accepts seed values between -1.0 and 1.0, inclusive");
41
41
  }
42
42
  uint32_t norm_seed = (input_seeds[i] + 1.0) * half_max;
@@ -7,6 +7,7 @@
7
7
  #include "duckdb/storage/statistics/string_statistics.hpp"
8
8
  #include "duckdb/planner/expression/bound_function_expression.hpp"
9
9
  #include "utf8proc.hpp"
10
+ #include "duckdb/common/types/blob.hpp"
10
11
 
11
12
  namespace duckdb {
12
13
 
@@ -138,6 +139,13 @@ string_t SubstringFun::SubstringUnicode(Vector &result, string_t input, int64_t
138
139
  }
139
140
  }
140
141
  }
142
+ while (!LengthFun::IsCharacter(input_data[start_pos])) {
143
+ start_pos++;
144
+ }
145
+ while (end_pos < input_size && !LengthFun::IsCharacter(input_data[end_pos])) {
146
+ end_pos++;
147
+ }
148
+
141
149
  if (end_pos == DConstants::INVALID_INDEX) {
142
150
  return SubstringEmptyString(result);
143
151
  }
@@ -257,7 +257,7 @@ public:
257
257
  first_file_size = file_size;
258
258
  bytes_read = 0;
259
259
  if (buffer_size < file_size) {
260
- bytes_per_local_state = buffer_size / MaxThreads();
260
+ bytes_per_local_state = buffer_size / ParallelCSVGlobalState::MaxThreads();
261
261
  } else {
262
262
  bytes_per_local_state = file_size / MaxThreads();
263
263
  }
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev2115"
2
+ #define DUCKDB_VERSION "0.6.2-dev2226"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "9480481947"
5
+ #define DUCKDB_SOURCE_ID "6e71048a0d"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -196,6 +196,8 @@ public:
196
196
  DUCKDB_API CatalogEntry *GetEntry(ClientContext &context, const string &schema, const string &name);
197
197
 
198
198
  //! Fetches a logical type from the catalog
199
+ DUCKDB_API LogicalType GetType(ClientContext &context, const string &schema, const string &names, bool if_exists);
200
+
199
201
  DUCKDB_API static LogicalType GetType(ClientContext &context, const string &catalog_name, const string &schema,
200
202
  const string &name);
201
203
 
@@ -22,7 +22,11 @@ enum class ValueRenderAlignment { LEFT, MIDDLE, RIGHT };
22
22
  struct BoxRendererConfig {
23
23
  // a max_width of 0 means we default to the terminal width
24
24
  idx_t max_width = 0;
25
+ // the maximum amount of rows to render
25
26
  idx_t max_rows = 20;
27
+ // the limit that is applied prior to rendering
28
+ // if we are rendering exactly "limit" rows then a question mark is rendered instead
29
+ idx_t limit = 0;
26
30
  // the max col width determines the maximum size of a single column
27
31
  // note that the max col width is only used if the result does not fit on the screen
28
32
  idx_t max_col_width = 20;
@@ -35,6 +35,7 @@ enum class RelationType : uint8_t {
35
35
  DELETE_RELATION,
36
36
  UPDATE_RELATION,
37
37
  WRITE_CSV_RELATION,
38
+ WRITE_PARQUET_RELATION,
38
39
  READ_CSV_RELATION,
39
40
  SUBQUERY_RELATION,
40
41
  TABLE_FUNCTION_RELATION,
@@ -25,6 +25,8 @@ public:
25
25
  virtual ClientContext *TryGetClientContext() = 0;
26
26
 
27
27
  DUCKDB_API static FileOpener *Get(ClientContext &context);
28
+ DUCKDB_API static ClientContext *TryGetClientContext(FileOpener *opener);
29
+ DUCKDB_API static bool TryGetCurrentSetting(FileOpener *opener, const string &key, Value &result);
28
30
  };
29
31
 
30
32
  } // namespace duckdb
@@ -35,7 +35,7 @@ public:
35
35
 
36
36
  //! helper function to get the HTTP
37
37
  static HTTPStats *TryGetStats(FileOpener *opener) {
38
- auto client_context = opener->TryGetClientContext();
38
+ auto client_context = FileOpener::TryGetClientContext(opener);
39
39
  if (client_context) {
40
40
  return client_context->client_data->http_stats.get();
41
41
  }