duckdb 0.8.2-dev3204.0 → 0.8.2-dev3250.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev3204.0",
5
+ "version": "0.8.2-dev3250.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -663,6 +663,20 @@ Value Value::MAP(const LogicalType &child_type, vector<Value> values) {
663
663
 
664
664
  result.type_ = LogicalType::MAP(child_type);
665
665
  result.is_null = false;
666
+ for (auto &val : values) {
667
+ D_ASSERT(val.type().InternalType() == PhysicalType::STRUCT);
668
+ auto &children = StructValue::GetChildren(val);
669
+
670
+ // Ensure that the field containing the keys is called 'key'
671
+ // and that the field containing the values is called 'value'
672
+ // this is required to make equality checks work
673
+ D_ASSERT(children.size() == 2);
674
+ child_list_t<Value> new_children;
675
+ new_children.reserve(2);
676
+ new_children.push_back(std::make_pair("key", children[0]));
677
+ new_children.push_back(std::make_pair("value", children[1]));
678
+ val = Value::STRUCT(std::move(new_children));
679
+ }
666
680
  result.value_info_ = make_shared<NestedValueInfo>(std::move(values));
667
681
  return result;
668
682
  }
@@ -1286,6 +1286,11 @@ void Vector::UTFVerify(idx_t count) {
1286
1286
  void Vector::VerifyMap(Vector &vector_p, const SelectionVector &sel_p, idx_t count) {
1287
1287
  #ifdef DEBUG
1288
1288
  D_ASSERT(vector_p.GetType().id() == LogicalTypeId::MAP);
1289
+ auto &child = ListType::GetChildType(vector_p.GetType());
1290
+ D_ASSERT(StructType::GetChildCount(child) == 2);
1291
+ D_ASSERT(StructType::GetChildName(child, 0) == "key");
1292
+ D_ASSERT(StructType::GetChildName(child, 1) == "value");
1293
+
1289
1294
  auto valid_check = MapVector::CheckMapValidity(vector_p, count, sel_p);
1290
1295
  D_ASSERT(valid_check == MapInvalidReason::VALID);
1291
1296
  #endif // DEBUG
@@ -1410,9 +1415,6 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
1410
1415
  }
1411
1416
  }
1412
1417
  }
1413
- if (vector->GetType().id() == LogicalTypeId::MAP) {
1414
- VerifyMap(*vector, *sel, count);
1415
- }
1416
1418
 
1417
1419
  if (vector->GetType().id() == LogicalTypeId::UNION) {
1418
1420
  VerifyUnion(*vector, *sel, count);
@@ -1460,6 +1462,10 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
1460
1462
  }
1461
1463
  Vector::Verify(child, child_sel, child_count);
1462
1464
  }
1465
+
1466
+ if (vector->GetType().id() == LogicalTypeId::MAP) {
1467
+ VerifyMap(*vector, *sel, count);
1468
+ }
1463
1469
  }
1464
1470
  #endif
1465
1471
  }
@@ -927,7 +927,24 @@ LogicalType LogicalType::AGGREGATE_STATE(aggregate_state_t state_type) { // NOLI
927
927
  //===--------------------------------------------------------------------===//
928
928
  // Map Type
929
929
  //===--------------------------------------------------------------------===//
930
- LogicalType LogicalType::MAP(const LogicalType &child) {
930
+ LogicalType LogicalType::MAP(const LogicalType &child_p) {
931
+ D_ASSERT(child_p.id() == LogicalTypeId::STRUCT);
932
+ auto &children = StructType::GetChildTypes(child_p);
933
+ D_ASSERT(children.size() == 2);
934
+
935
+ // We do this to enforce that for every MAP created, the keys are called "key"
936
+ // and the values are called "value"
937
+
938
+ // This is done because for Vector the keys of the STRUCT are used in equality checks.
939
+ // Vector::Reference will throw if the types don't match
940
+ child_list_t<LogicalType> new_children(2);
941
+ new_children[0] = children[0];
942
+ new_children[0].first = "key";
943
+
944
+ new_children[1] = children[1];
945
+ new_children[1].first = "value";
946
+
947
+ auto child = LogicalType::STRUCT(std::move(new_children));
931
948
  auto info = make_shared<ListTypeInfo>(child);
932
949
  return LogicalType(LogicalTypeId::MAP, std::move(info));
933
950
  }
@@ -142,6 +142,7 @@ static void HistogramFinalizeFunction(Vector &state_vector, AggregateInputData &
142
142
  list_struct_data[rid].offset = old_len;
143
143
  old_len += list_struct_data[rid].length;
144
144
  }
145
+ result.Verify(count);
145
146
  }
146
147
 
147
148
  unique_ptr<FunctionData> HistogramBindFunction(ClientContext &context, AggregateFunction &function,
@@ -42,8 +42,8 @@ static unique_ptr<FunctionData> MapEntriesBind(ClientContext &context, ScalarFun
42
42
  auto &key_type = MapType::KeyType(map);
43
43
  auto &value_type = MapType::ValueType(map);
44
44
 
45
- child_types.push_back(make_pair("k", key_type));
46
- child_types.push_back(make_pair("v", value_type));
45
+ child_types.push_back(make_pair("key", key_type));
46
+ child_types.push_back(make_pair("value", value_type));
47
47
 
48
48
  auto row_type = LogicalType::STRUCT(child_types);
49
49
 
@@ -308,40 +308,37 @@ OperatorResultType PhysicalUnnest::ExecuteInternal(ExecutionContext &context, Da
308
308
  // UNNEST(NULL)
309
309
  chunk.SetCardinality(0);
310
310
  break;
311
+ }
311
312
 
312
- } else {
313
-
314
- auto &vector_data = state.list_vector_data[col_idx];
315
- auto current_idx = vector_data.sel->get_index(state.current_row);
316
-
317
- if (!vector_data.validity.RowIsValid(current_idx)) {
318
- UnnestNull(0, this_chunk_len, result_vector);
313
+ auto &vector_data = state.list_vector_data[col_idx];
314
+ auto current_idx = vector_data.sel->get_index(state.current_row);
319
315
 
320
- } else {
316
+ if (!vector_data.validity.RowIsValid(current_idx)) {
317
+ UnnestNull(0, this_chunk_len, result_vector);
318
+ continue;
319
+ }
321
320
 
322
- auto list_data = UnifiedVectorFormat::GetData<list_entry_t>(vector_data);
323
- auto list_entry = list_data[current_idx];
321
+ auto list_data = UnifiedVectorFormat::GetData<list_entry_t>(vector_data);
322
+ auto list_entry = list_data[current_idx];
324
323
 
325
- idx_t list_count = 0;
326
- if (state.list_position < list_entry.length) {
327
- // there are still list_count elements to unnest
328
- list_count = MinValue<idx_t>(this_chunk_len, list_entry.length - state.list_position);
324
+ idx_t list_count = 0;
325
+ if (state.list_position < list_entry.length) {
326
+ // there are still list_count elements to unnest
327
+ list_count = MinValue<idx_t>(this_chunk_len, list_entry.length - state.list_position);
329
328
 
330
- auto &list_vector = state.list_data.data[col_idx];
331
- auto &child_vector = ListVector::GetEntry(list_vector);
332
- auto list_size = ListVector::GetListSize(list_vector);
333
- auto &child_vector_data = state.list_child_data[col_idx];
329
+ auto &list_vector = state.list_data.data[col_idx];
330
+ auto &child_vector = ListVector::GetEntry(list_vector);
331
+ auto list_size = ListVector::GetListSize(list_vector);
332
+ auto &child_vector_data = state.list_child_data[col_idx];
334
333
 
335
- auto base_offset = list_entry.offset + state.list_position;
336
- UnnestVector(child_vector_data, child_vector, list_size, base_offset, base_offset + list_count,
337
- result_vector);
338
- }
334
+ auto base_offset = list_entry.offset + state.list_position;
335
+ UnnestVector(child_vector_data, child_vector, list_size, base_offset, base_offset + list_count,
336
+ result_vector);
337
+ }
339
338
 
340
- // fill the rest with NULLs
341
- if (list_count != this_chunk_len) {
342
- UnnestNull(list_count, this_chunk_len, result_vector);
343
- }
344
- }
339
+ // fill the rest with NULLs
340
+ if (list_count != this_chunk_len) {
341
+ UnnestNull(list_count, this_chunk_len, result_vector);
345
342
  }
346
343
  }
347
344
 
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev3204"
2
+ #define DUCKDB_VERSION "0.8.2-dev3250"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "ade3443f4a"
5
+ #define DUCKDB_SOURCE_ID "a7f85b3b2c"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -68,7 +68,7 @@ struct DebugForceNoCrossProduct {
68
68
  struct OrderedAggregateThreshold {
69
69
  static constexpr const char *Name = "ordered_aggregate_threshold"; // NOLINT
70
70
  static constexpr const char *Description = // NOLINT
71
- "the number of rows to accumulate before sorting, used for tuning";
71
+ "The number of rows to accumulate before sorting, used for tuning";
72
72
  static constexpr const LogicalTypeId InputType = LogicalTypeId::UBIGINT; // NOLINT
73
73
  static void SetLocal(ClientContext &context, const Value &parameter);
74
74
  static void ResetLocal(ClientContext &context);
@@ -225,6 +225,7 @@ struct EnableProgressBarSetting {
225
225
  static void ResetLocal(ClientContext &context);
226
226
  static Value GetSetting(ClientContext &context);
227
227
  };
228
+
228
229
  struct EnableProgressBarPrintSetting {
229
230
  static constexpr const char *Name = "enable_progress_bar_print";
230
231
  static constexpr const char *Description =
@@ -235,15 +236,6 @@ struct EnableProgressBarPrintSetting {
235
236
  static Value GetSetting(ClientContext &context);
236
237
  };
237
238
 
238
- struct ExperimentalParallelCSVSetting {
239
- static constexpr const char *Name = "experimental_parallel_csv";
240
- static constexpr const char *Description = "Whether or not to use the experimental parallel CSV reader";
241
- static constexpr const LogicalTypeId InputType = LogicalTypeId::BOOLEAN;
242
- static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &parameter);
243
- static void ResetGlobal(DatabaseInstance *db, DBConfig &config);
244
- static Value GetSetting(ClientContext &context);
245
- };
246
-
247
239
  struct ExplainOutputSetting {
248
240
  static constexpr const char *Name = "explain_output";
249
241
  static constexpr const char *Description = "Output of EXPLAIN statements (ALL, OPTIMIZED_ONLY, PHYSICAL_ONLY)";
@@ -166,9 +166,8 @@ void EmptyStreamRelease(ArrowArrayStream *) {
166
166
  }
167
167
 
168
168
  void FactoryGetSchema(uintptr_t stream_factory_ptr, duckdb::ArrowSchemaWrapper &schema) {
169
- auto private_data =
170
- reinterpret_cast<PrivateData *>(reinterpret_cast<ArrowArrayStream *>(stream_factory_ptr)->private_data);
171
- schema.arrow_schema = *private_data->schema;
169
+ auto stream = reinterpret_cast<ArrowArrayStream *>(stream_factory_ptr);
170
+ stream->get_schema(stream, &schema.arrow_schema);
172
171
 
173
172
  // Need to nullify the root schema's release function here, because streams don't allow us to set the release
174
173
  // function. For the schema's children, we nullify the release functions in `duckdb_arrow_scan`, so we don't need to
@@ -75,7 +75,6 @@ static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting
75
75
  DUCKDB_LOCAL(EnableProfilingSetting),
76
76
  DUCKDB_LOCAL(EnableProgressBarSetting),
77
77
  DUCKDB_LOCAL(EnableProgressBarPrintSetting),
78
- DUCKDB_GLOBAL(ExperimentalParallelCSVSetting),
79
78
  DUCKDB_LOCAL(ExplainOutputSetting),
80
79
  DUCKDB_GLOBAL(ExtensionDirectorySetting),
81
80
  DUCKDB_GLOBAL(ExternalThreadsSetting),
@@ -564,21 +564,6 @@ Value EnableProgressBarPrintSetting::GetSetting(ClientContext &context) {
564
564
  return Value::BOOLEAN(ClientConfig::GetConfig(context).print_progress_bar);
565
565
  }
566
566
 
567
- //===--------------------------------------------------------------------===//
568
- // Experimental Parallel CSV
569
- //===--------------------------------------------------------------------===//
570
- void ExperimentalParallelCSVSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) {
571
- Printer::Print("experimental_parallel_csv is deprecated and will be removed with the next release - the parallel "
572
- "CSV reader is now standard and does not need to be manually enabled anymore 1");
573
- }
574
-
575
- void ExperimentalParallelCSVSetting::ResetGlobal(DatabaseInstance *db, DBConfig &config) {
576
- }
577
-
578
- Value ExperimentalParallelCSVSetting::GetSetting(ClientContext &context) {
579
- return Value();
580
- }
581
-
582
567
  //===--------------------------------------------------------------------===//
583
568
  // Explain Output
584
569
  //===--------------------------------------------------------------------===//
@@ -461,7 +461,7 @@ void FSSTStorage::Compress(CompressionState &state_p, Vector &scan_vector, idx_t
461
461
  &sizes_in[0], /* IN: byte-lengths of the inputs */
462
462
  &strings_in[0], /* IN: input string start pointers. */
463
463
  compress_buffer_size, /* IN: byte-length of output buffer. */
464
- &compress_buffer[0], /* OUT: memorxy buffer to put the compressed strings in (one after the other). */
464
+ &compress_buffer[0], /* OUT: memory buffer to put the compressed strings in (one after the other). */
465
465
  &sizes_out[0], /* OUT: byte-lengths of the compressed strings. */
466
466
  &strings_out[0] /* OUT: output string start pointers. Will all point into [output,output+size). */
467
467
  );
@@ -1073,20 +1073,19 @@ void DataTable::VerifyUpdateConstraints(ClientContext &context, TableCatalogEntr
1073
1073
  const vector<PhysicalIndex> &column_ids) {
1074
1074
  auto &constraints = table.GetConstraints();
1075
1075
  auto &bound_constraints = table.GetBoundConstraints();
1076
- for (idx_t i = 0; i < bound_constraints.size(); i++) {
1077
- auto &base_constraint = constraints[i];
1078
- auto &constraint = bound_constraints[i];
1076
+ for (idx_t constr_idx = 0; constr_idx < bound_constraints.size(); constr_idx++) {
1077
+ auto &base_constraint = constraints[constr_idx];
1078
+ auto &constraint = bound_constraints[constr_idx];
1079
1079
  switch (constraint->type) {
1080
1080
  case ConstraintType::NOT_NULL: {
1081
1081
  auto &bound_not_null = *reinterpret_cast<BoundNotNullConstraint *>(constraint.get());
1082
1082
  auto &not_null = *reinterpret_cast<NotNullConstraint *>(base_constraint.get());
1083
1083
  // check if the constraint is in the list of column_ids
1084
- // FIXME: double usage of 'i'?
1085
- for (idx_t i = 0; i < column_ids.size(); i++) {
1086
- if (column_ids[i] == bound_not_null.index) {
1084
+ for (idx_t col_idx = 0; col_idx < column_ids.size(); col_idx++) {
1085
+ if (column_ids[col_idx] == bound_not_null.index) {
1087
1086
  // found the column id: check the data in
1088
1087
  auto &col = table.GetColumn(LogicalIndex(not_null.index));
1089
- VerifyNotNullConstraint(table, chunk.data[i], chunk.size(), col.Name());
1088
+ VerifyNotNullConstraint(table, chunk.data[col_idx], chunk.size(), col.Name());
1090
1089
  break;
1091
1090
  }
1092
1091
  }
@@ -1122,10 +1121,10 @@ void DataTable::VerifyUpdateConstraints(ClientContext &context, TableCatalogEntr
1122
1121
  void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector &row_ids,
1123
1122
  const vector<PhysicalIndex> &column_ids, DataChunk &updates) {
1124
1123
  D_ASSERT(row_ids.GetType().InternalType() == ROW_TYPE);
1125
-
1126
1124
  D_ASSERT(column_ids.size() == updates.ColumnCount());
1127
- auto count = updates.size();
1128
1125
  updates.Verify();
1126
+
1127
+ auto count = updates.size();
1129
1128
  if (count == 0) {
1130
1129
  return;
1131
1130
  }
@@ -1138,24 +1137,36 @@ void DataTable::Update(TableCatalogEntry &table, ClientContext &context, Vector
1138
1137
  VerifyUpdateConstraints(context, table, updates, column_ids);
1139
1138
 
1140
1139
  // now perform the actual update
1141
- auto &transaction = DuckTransaction::Get(context, db);
1140
+ Vector max_row_id_vec(Value::BIGINT(MAX_ROW_ID));
1141
+ Vector row_ids_slice(LogicalType::BIGINT);
1142
+ DataChunk updates_slice;
1143
+ updates_slice.InitializeEmpty(updates.GetTypes());
1142
1144
 
1143
- updates.Flatten();
1144
- row_ids.Flatten(count);
1145
- auto ids = FlatVector::GetData<row_t>(row_ids);
1146
- auto first_id = FlatVector::GetValue<row_t>(row_ids, 0);
1147
- if (first_id >= MAX_ROW_ID) {
1148
- // update is in transaction-local storage: push update into local storage
1149
- auto &local_storage = LocalStorage::Get(context, db);
1150
- local_storage.Update(*this, row_ids, column_ids, updates);
1151
- return;
1145
+ SelectionVector sel_local_update(count), sel_global_update(count);
1146
+ auto n_local_update = VectorOperations::GreaterThanEquals(row_ids, max_row_id_vec, nullptr, count,
1147
+ &sel_local_update, &sel_global_update);
1148
+ auto n_global_update = count - n_local_update;
1149
+
1150
+ // row id > MAX_ROW_ID? transaction-local storage
1151
+ if (n_local_update > 0) {
1152
+ updates_slice.Slice(updates, sel_local_update, n_local_update);
1153
+ updates_slice.Flatten();
1154
+ row_ids_slice.Slice(row_ids, sel_local_update, n_local_update);
1155
+ row_ids_slice.Flatten(n_local_update);
1156
+
1157
+ LocalStorage::Get(context, db).Update(*this, row_ids_slice, column_ids, updates_slice);
1152
1158
  }
1153
1159
 
1154
- // update is in the row groups
1155
- // we need to figure out for each id to which row group it belongs
1156
- // usually all (or many) ids belong to the same row group
1157
- // we iterate over the ids and check for every id if it belongs to the same row group as their predecessor
1158
- row_groups->Update(transaction, ids, column_ids, updates);
1160
+ // otherwise global storage
1161
+ if (n_global_update > 0) {
1162
+ updates_slice.Slice(updates, sel_global_update, n_global_update);
1163
+ updates_slice.Flatten();
1164
+ row_ids_slice.Slice(row_ids, sel_global_update, n_global_update);
1165
+ row_ids_slice.Flatten(n_global_update);
1166
+
1167
+ row_groups->Update(DuckTransaction::Get(context, db), FlatVector::GetData<row_t>(row_ids_slice), column_ids,
1168
+ updates_slice);
1169
+ }
1159
1170
  }
1160
1171
 
1161
1172
  void DataTable::UpdateColumn(TableCatalogEntry &table, ClientContext &context, Vector &row_ids,