duckdb 0.5.1-dev7.0 → 0.5.1-dev72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
- "version": "0.5.1-dev7.0",
4
+ "version": "0.5.1-dev72.0",
5
5
  "description": "DuckDB node.js API",
6
6
  "gypfile": true,
7
7
  "dependencies": {
package/src/duckdb.cpp CHANGED
@@ -620,7 +620,88 @@ public:
620
620
 
621
621
  } // namespace duckdb
622
622
 
623
+ //===----------------------------------------------------------------------===//
624
+ // DuckDB
625
+ //
626
+ // extension_functions.hpp
627
+ //
628
+ //
629
+ //===----------------------------------------------------------------------===//
630
+
631
+
632
+
633
+
623
634
 
635
+ namespace duckdb {
636
+
637
+ struct ExtensionFunction {
638
+ char function[48];
639
+ char extension[48];
640
+ };
641
+
642
+ static constexpr ExtensionFunction EXTENSION_FUNCTIONS[] = {
643
+ {"->>", "json"},
644
+ {"array_to_json", "json"},
645
+ {"create_fts_index", "fts"},
646
+ {"dbgen", "tpch"},
647
+ {"drop_fts_index", "fts"},
648
+ {"dsdgen", "tpcds"},
649
+ {"excel_text", "excel"},
650
+ {"from_json", "json"},
651
+ {"from_json_strict", "json"},
652
+ {"from_substrait", "substrait"},
653
+ {"get_substrait", "substrait"},
654
+ {"get_substrait_json", "substrait"},
655
+ {"icu_calendar_names", "icu"},
656
+ {"icu_sort_key", "icu"},
657
+ {"json", "json"},
658
+ {"json_array", "json"},
659
+ {"json_array_length", "json"},
660
+ {"json_extract", "json"},
661
+ {"json_extract_path", "json"},
662
+ {"json_extract_path_text", "json"},
663
+ {"json_extract_string", "json"},
664
+ {"json_group_array", "json"},
665
+ {"json_group_object", "json"},
666
+ {"json_group_structure", "json"},
667
+ {"json_merge_patch", "json"},
668
+ {"json_object", "json"},
669
+ {"json_quote", "json"},
670
+ {"json_structure", "json"},
671
+ {"json_transform", "json"},
672
+ {"json_transform_strict", "json"},
673
+ {"json_type", "json"},
674
+ {"json_valid", "json"},
675
+ {"make_timestamptz", "icu"},
676
+ {"parquet_metadata", "parquet"},
677
+ {"parquet_scan", "parquet"},
678
+ {"parquet_schema", "parquet"},
679
+ {"pg_timezone_names", "icu"},
680
+ {"postgres_attach", "postgres_scanner"},
681
+ {"postgres_scan", "postgres_scanner"},
682
+ {"postgres_scan_pushdown", "postgres_scanner"},
683
+ {"read_json_objects", "json"},
684
+ {"read_ndjson_objects", "json"},
685
+ {"read_parquet", "parquet"},
686
+ {"row_to_json", "json"},
687
+ {"sqlite_attach", "sqlite_scanner"},
688
+ {"sqlite_scan", "sqlite_scanner"},
689
+ {"stem", "fts"},
690
+ {"text", "excel"},
691
+ {"to_json", "json"},
692
+ {"tpcds", "tpcds"},
693
+ {"tpcds_answers", "tpcds"},
694
+ {"tpcds_queries", "tpcds"},
695
+ {"tpch", "tpch"},
696
+ {"tpch_answers", "tpch"},
697
+ {"tpch_queries", "tpch"},
698
+ {"visualize_diff_profiling_output", "visualizer"},
699
+ {"visualize_json_profiling_output", "visualizer"},
700
+ {"visualize_last_profiling_output", "visualizer"},
701
+ };
702
+ } // namespace duckdb
703
+
704
+ #include <algorithm>
624
705
  namespace duckdb {
625
706
 
626
707
  string SimilarCatalogEntry::GetQualifiedName() const {
@@ -823,6 +904,16 @@ SimilarCatalogEntry Catalog::SimilarEntryInSchemas(ClientContext &context, const
823
904
  return {most_similar.first, most_similar.second, schema_of_most_similar};
824
905
  }
825
906
 
907
+ string FindExtension(const string &function_name) {
908
+ auto size = sizeof(EXTENSION_FUNCTIONS) / sizeof(ExtensionFunction);
909
+ auto it = std::lower_bound(
910
+ EXTENSION_FUNCTIONS, EXTENSION_FUNCTIONS + size, function_name,
911
+ [](const ExtensionFunction &element, const string &value) { return element.function < value; });
912
+ if (it != EXTENSION_FUNCTIONS + size && it->function == function_name) {
913
+ return it->extension;
914
+ }
915
+ return "";
916
+ }
826
917
  CatalogException Catalog::CreateMissingEntryException(ClientContext &context, const string &entry_name,
827
918
  CatalogType type, const vector<SchemaCatalogEntry *> &schemas,
828
919
  QueryErrorContext error_context) {
@@ -836,7 +927,12 @@ CatalogException Catalog::CreateMissingEntryException(ClientContext &context, co
836
927
  }
837
928
  });
838
929
  auto unseen_entry = SimilarEntryInSchemas(context, entry_name, type, unseen_schemas);
839
-
930
+ auto extension_name = FindExtension(entry_name);
931
+ if (!extension_name.empty()) {
932
+ return CatalogException("Function with name %s is not on the catalog, but it exists in the %s extension. To "
933
+ "Install and Load the extension, run: INSTALL %s; LOAD %s;",
934
+ entry_name, extension_name, extension_name, extension_name);
935
+ }
840
936
  string did_you_mean;
841
937
  if (unseen_entry.Found() && unseen_entry.distance < entry.distance) {
842
938
  did_you_mean = "\nDid you mean \"" + unseen_entry.GetQualifiedName() + "\"?";
@@ -33336,6 +33432,9 @@ void RowOperations::UnswizzleHeapPointer(const RowLayout &layout, const data_ptr
33336
33432
 
33337
33433
  static inline void VerifyUnswizzledString(const RowLayout &layout, const idx_t &col_idx, const data_ptr_t &row_ptr) {
33338
33434
  #ifdef DEBUG
33435
+ if (layout.GetTypes()[col_idx] == LogicalTypeId::BLOB) {
33436
+ return;
33437
+ }
33339
33438
  idx_t entry_idx;
33340
33439
  idx_t idx_in_entry;
33341
33440
  ValidityBytes::GetEntryIndex(col_idx, entry_idx, idx_in_entry);
@@ -35673,7 +35772,10 @@ struct SortConstants {
35673
35772
 
35674
35773
  struct SortLayout {
35675
35774
  public:
35775
+ SortLayout() {
35776
+ }
35676
35777
  explicit SortLayout(const vector<BoundOrderByNode> &orders);
35778
+ SortLayout GetPrefixComparisonLayout(idx_t num_prefix_cols) const;
35677
35779
 
35678
35780
  public:
35679
35781
  idx_t column_count;
@@ -37324,6 +37426,32 @@ SortLayout::SortLayout(const vector<BoundOrderByNode> &orders)
37324
37426
  blob_layout.Initialize(blob_layout_types);
37325
37427
  }
37326
37428
 
37429
+ SortLayout SortLayout::GetPrefixComparisonLayout(idx_t num_prefix_cols) const {
37430
+ SortLayout result;
37431
+ result.column_count = num_prefix_cols;
37432
+ result.all_constant = true;
37433
+ result.comparison_size = 0;
37434
+ for (idx_t col_idx = 0; col_idx < num_prefix_cols; col_idx++) {
37435
+ result.order_types.push_back(order_types[col_idx]);
37436
+ result.order_by_null_types.push_back(order_by_null_types[col_idx]);
37437
+ result.logical_types.push_back(logical_types[col_idx]);
37438
+
37439
+ result.all_constant = result.all_constant && constant_size[col_idx];
37440
+ result.constant_size.push_back(constant_size[col_idx]);
37441
+
37442
+ result.comparison_size += column_sizes[col_idx];
37443
+ result.column_sizes.push_back(column_sizes[col_idx]);
37444
+
37445
+ result.prefix_lengths.push_back(prefix_lengths[col_idx]);
37446
+ result.stats.push_back(stats[col_idx]);
37447
+ result.has_null.push_back(has_null[col_idx]);
37448
+ }
37449
+ result.entry_size = entry_size;
37450
+ result.blob_layout = blob_layout;
37451
+ result.sorting_to_blob_col = sorting_to_blob_col;
37452
+ return result;
37453
+ }
37454
+
37327
37455
  LocalSortState::LocalSortState() : initialized(false) {
37328
37456
  }
37329
37457
 
@@ -64927,12 +65055,14 @@ public:
64927
65055
 
64928
65056
  WindowGlobalHashGroup(BufferManager &buffer_manager, const Orders &partitions, const Orders &orders,
64929
65057
  const Types &payload_types, idx_t max_mem, bool external)
64930
- : memory_per_thread(max_mem), count(0), partition_layout(partitions) {
65058
+ : memory_per_thread(max_mem), count(0) {
64931
65059
 
64932
65060
  RowLayout payload_layout;
64933
65061
  payload_layout.Initialize(payload_types);
64934
65062
  global_sort = make_unique<GlobalSortState>(buffer_manager, orders, payload_layout);
64935
65063
  global_sort->external = external;
65064
+
65065
+ partition_layout = global_sort->sort_layout.GetPrefixComparisonLayout(partitions.size());
64936
65066
  }
64937
65067
 
64938
65068
  void Combine(LocalSortState &local_sort) {
@@ -96859,7 +96989,8 @@ struct DateDiff {
96859
96989
  struct WeekOperator {
96860
96990
  template <class TA, class TB, class TR>
96861
96991
  static inline TR Operation(TA startdate, TB enddate) {
96862
- return Date::Epoch(enddate) / Interval::SECS_PER_WEEK - Date::Epoch(startdate) / Interval::SECS_PER_WEEK;
96992
+ return Date::Epoch(Date::GetMondayOfCurrentWeek(enddate)) / Interval::SECS_PER_WEEK -
96993
+ Date::Epoch(Date::GetMondayOfCurrentWeek(startdate)) / Interval::SECS_PER_WEEK;
96863
96994
  }
96864
96995
  };
96865
96996
 
@@ -104318,18 +104449,12 @@ void SinkDataChunk(Vector *child_vector, SelectionVector &sel, idx_t offset_list
104318
104449
  static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &result) {
104319
104450
  D_ASSERT(args.ColumnCount() >= 1 && args.ColumnCount() <= 3);
104320
104451
  auto count = args.size();
104321
- Vector &lists = args.data[0];
104452
+ Vector &input_lists = args.data[0];
104322
104453
 
104323
104454
  result.SetVectorType(VectorType::FLAT_VECTOR);
104324
104455
  auto &result_validity = FlatVector::Validity(result);
104325
104456
 
104326
- for (auto &v : args.data) {
104327
- if (v.GetVectorType() != VectorType::FLAT_VECTOR && v.GetVectorType() != VectorType::CONSTANT_VECTOR) {
104328
- v.Flatten(count);
104329
- }
104330
- }
104331
-
104332
- if (lists.GetType().id() == LogicalTypeId::SQLNULL) {
104457
+ if (input_lists.GetType().id() == LogicalTypeId::SQLNULL) {
104333
104458
  result_validity.SetInvalid(0);
104334
104459
  return;
104335
104460
  }
@@ -104344,15 +104469,18 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104344
104469
  LocalSortState local_sort_state;
104345
104470
  local_sort_state.Initialize(global_sort_state, buffer_manager);
104346
104471
 
104472
+ // this ensures that we do not change the order of the entries in the input chunk
104473
+ VectorOperations::Copy(input_lists, result, count, 0, 0);
104474
+
104347
104475
  // get the child vector
104348
- auto lists_size = ListVector::GetListSize(lists);
104349
- auto &child_vector = ListVector::GetEntry(lists);
104476
+ auto lists_size = ListVector::GetListSize(result);
104477
+ auto &child_vector = ListVector::GetEntry(result);
104350
104478
  UnifiedVectorFormat child_data;
104351
104479
  child_vector.ToUnifiedFormat(lists_size, child_data);
104352
104480
 
104353
104481
  // get the lists data
104354
104482
  UnifiedVectorFormat lists_data;
104355
- lists.ToUnifiedFormat(count, lists_data);
104483
+ result.ToUnifiedFormat(count, lists_data);
104356
104484
  auto list_entries = (list_entry_t *)lists_data.data;
104357
104485
 
104358
104486
  // create the lists_indices vector, this contains an element for each list's entry,
@@ -104449,8 +104577,6 @@ static void ListSortFunction(DataChunk &args, ExpressionState &state, Vector &re
104449
104577
  child_vector.Flatten(sel_sorted_idx);
104450
104578
  }
104451
104579
 
104452
- result.Reference(lists);
104453
-
104454
104580
  if (args.AllConstant()) {
104455
104581
  result.SetVectorType(VectorType::CONSTANT_VECTOR);
104456
104582
  }
@@ -141402,7 +141528,7 @@ Value ForceCompressionSetting::GetSetting(ClientContext &context) {
141402
141528
  //===--------------------------------------------------------------------===//
141403
141529
  void HomeDirectorySetting::SetLocal(ClientContext &context, const Value &input) {
141404
141530
  auto &config = ClientConfig::GetConfig(context);
141405
- config.home_directory = input.IsNull() ? input.ToString() : string();
141531
+ config.home_directory = input.IsNull() ? string() : input.ToString();
141406
141532
  }
141407
141533
 
141408
141534
  Value HomeDirectorySetting::GetSetting(ClientContext &context) {
@@ -151925,6 +152051,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalGet
151925
152051
 
151926
152052
 
151927
152053
 
152054
+
151928
152055
  namespace duckdb {
151929
152056
 
151930
152057
  void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, unique_ptr<LogicalOperator> *node_ptr) {
@@ -151954,10 +152081,15 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
151954
152081
  // semi or inner join on false; entire node can be pruned
151955
152082
  ReplaceWithEmptyResult(*node_ptr);
151956
152083
  return;
151957
- case JoinType::ANTI:
151958
- // anti join: replace entire join with LHS
151959
- *node_ptr = move(join.children[0]);
152084
+ case JoinType::ANTI: {
152085
+ // when the right child has data, return the left child
152086
+ // when the right child has no data, return an empty set
152087
+ auto limit = make_unique<LogicalLimit>(1, 0, nullptr, nullptr);
152088
+ limit->AddChild(move(join.children[1]));
152089
+ auto cross_product = LogicalCrossProduct::Create(move(join.children[0]), move(limit));
152090
+ *node_ptr = move(cross_product);
151960
152091
  return;
152092
+ }
151961
152093
  case JoinType::LEFT:
151962
152094
  // anti/left outer join: replace right side with empty node
151963
152095
  ReplaceWithEmptyResult(join.children[1]);
@@ -151985,10 +152117,15 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
151985
152117
  } else {
151986
152118
  // this is the only condition and it is always true: all conditions are true
151987
152119
  switch (join.join_type) {
151988
- case JoinType::SEMI:
151989
- // semi join on true: replace entire join with LHS
151990
- *node_ptr = move(join.children[0]);
152120
+ case JoinType::SEMI: {
152121
+ // when the right child has data, return the left child
152122
+ // when the right child has no data, return an empty set
152123
+ auto limit = make_unique<LogicalLimit>(1, 0, nullptr, nullptr);
152124
+ limit->AddChild(move(join.children[1]));
152125
+ auto cross_product = LogicalCrossProduct::Create(move(join.children[0]), move(limit));
152126
+ *node_ptr = move(cross_product);
151991
152127
  return;
152128
+ }
151992
152129
  case JoinType::INNER:
151993
152130
  case JoinType::LEFT:
151994
152131
  case JoinType::RIGHT:
@@ -152105,6 +152242,7 @@ unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalJoin
152105
152242
  // then propagate into the join conditions
152106
152243
  switch (join.type) {
152107
152244
  case LogicalOperatorType::LOGICAL_COMPARISON_JOIN:
152245
+ case LogicalOperatorType::LOGICAL_DELIM_JOIN:
152108
152246
  PropagateStatistics((LogicalComparisonJoin &)join, node_ptr);
152109
152247
  break;
152110
152248
  case LogicalOperatorType::LOGICAL_ANY_JOIN:
@@ -171765,7 +171903,7 @@ LogicalType Transformer::TransformTypeName(duckdb_libpgquery::PGTypeName *type_n
171765
171903
 
171766
171904
  result_type = LogicalType::MAP(move(children));
171767
171905
  } else {
171768
- int8_t width, scale;
171906
+ int64_t width, scale;
171769
171907
  if (base_type == LogicalTypeId::DECIMAL) {
171770
171908
  // default decimal width/scale
171771
171909
  width = 18;
@@ -180863,6 +181001,9 @@ BoundStatement Binder::BindSummarize(ShowStatement &stmt) {
180863
181001
 
180864
181002
 
180865
181003
 
181004
+
181005
+
181006
+
180866
181007
  //===----------------------------------------------------------------------===//
180867
181008
  // DuckDB
180868
181009
  //
@@ -180897,10 +181038,6 @@ protected:
180897
181038
 
180898
181039
 
180899
181040
 
180900
-
180901
-
180902
-
180903
-
180904
181041
  //===----------------------------------------------------------------------===//
180905
181042
  // DuckDB
180906
181043
  //
@@ -180933,6 +181070,8 @@ public:
180933
181070
  };
180934
181071
  } // namespace duckdb
180935
181072
 
181073
+
181074
+
180936
181075
  #include <algorithm>
180937
181076
 
180938
181077
  namespace duckdb {
@@ -181103,10 +181242,10 @@ BoundStatement Binder::Bind(UpdateStatement &stmt) {
181103
181242
  if (column.Generated()) {
181104
181243
  throw BinderException("Cant update column \"%s\" because it is a generated column!", column.Name());
181105
181244
  }
181106
- if (std::find(update->columns.begin(), update->columns.end(), column.Oid()) != update->columns.end()) {
181245
+ if (std::find(update->columns.begin(), update->columns.end(), column.StorageOid()) != update->columns.end()) {
181107
181246
  throw BinderException("Multiple assignments to same column \"%s\"", colname);
181108
181247
  }
181109
- update->columns.push_back(column.Oid());
181248
+ update->columns.push_back(column.StorageOid());
181110
181249
 
181111
181250
  if (expr->type == ExpressionType::VALUE_DEFAULT) {
181112
181251
  update->expressions.push_back(make_unique<BoundDefaultExpression>(column.Type()));
@@ -181188,7 +181327,20 @@ BoundStatement Binder::Bind(VacuumStatement &stmt) {
181188
181327
  auto &get = (LogicalGet &)*ref->get;
181189
181328
  columns.insert(columns.end(), get.names.begin(), get.names.end());
181190
181329
  }
181330
+
181331
+ case_insensitive_set_t column_name_set;
181332
+ vector<string> non_generated_column_names;
181191
181333
  for (auto &col_name : columns) {
181334
+ if (column_name_set.count(col_name) > 0) {
181335
+ throw BinderException("Vacuum the same column twice(same name in column name list)");
181336
+ }
181337
+ column_name_set.insert(col_name);
181338
+ auto &col = ref->table->GetColumn(col_name);
181339
+ // ignore generated column
181340
+ if (col.Generated()) {
181341
+ continue;
181342
+ }
181343
+ non_generated_column_names.push_back(col_name);
181192
181344
  ColumnRefExpression colref(col_name, ref->table->name);
181193
181345
  auto result = bind_context.BindColumn(colref, 0);
181194
181346
  if (result.HasError()) {
@@ -181196,17 +181348,29 @@ BoundStatement Binder::Bind(VacuumStatement &stmt) {
181196
181348
  }
181197
181349
  select_list.push_back(move(result.expression));
181198
181350
  }
181199
- auto table_scan = CreatePlan(*ref);
181200
- D_ASSERT(table_scan->type == LogicalOperatorType::LOGICAL_GET);
181201
- auto &get = (LogicalGet &)*table_scan;
181202
- for (idx_t i = 0; i < get.column_ids.size(); i++) {
181203
- stmt.info->column_id_map[i] = get.column_ids[i];
181204
- }
181351
+ stmt.info->columns = move(non_generated_column_names);
181352
+ if (!select_list.empty()) {
181353
+ auto table_scan = CreatePlan(*ref);
181354
+ D_ASSERT(table_scan->type == LogicalOperatorType::LOGICAL_GET);
181205
181355
 
181206
- auto projection = make_unique<LogicalProjection>(GenerateTableIndex(), move(select_list));
181207
- projection->children.push_back(move(table_scan));
181356
+ auto &get = (LogicalGet &)*table_scan;
181357
+
181358
+ D_ASSERT(select_list.size() == get.column_ids.size());
181359
+ D_ASSERT(stmt.info->columns.size() == get.column_ids.size());
181360
+ for (idx_t i = 0; i < get.column_ids.size(); i++) {
181361
+ stmt.info->column_id_map[i] = ref->table->columns[get.column_ids[i]].StorageOid();
181362
+ }
181363
+
181364
+ auto projection = make_unique<LogicalProjection>(GenerateTableIndex(), move(select_list));
181365
+ projection->children.push_back(move(table_scan));
181208
181366
 
181209
- root = move(projection);
181367
+ root = move(projection);
181368
+ } else {
181369
+ // eg. CREATE TABLE test (x AS (1));
181370
+ // ANALYZE test;
181371
+ // Make it not a SINK so it doesn't have to do anything
181372
+ stmt.info->has_table = false;
181373
+ }
181210
181374
  }
181211
181375
  auto vacuum = make_unique<LogicalSimple>(LogicalOperatorType::LOGICAL_VACUUM, move(stmt.info));
181212
181376
  if (root) {
package/src/duckdb.hpp CHANGED
@@ -11,8 +11,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
11
11
  #pragma once
12
12
  #define DUCKDB_AMALGAMATION 1
13
13
  #define DUCKDB_AMALGAMATION_EXTENDED 1
14
- #define DUCKDB_SOURCE_ID "9656abd17"
15
- #define DUCKDB_VERSION "v0.5.1-dev7"
14
+ #define DUCKDB_SOURCE_ID "62c4acd27"
15
+ #define DUCKDB_VERSION "v0.5.1-dev72"
16
16
  //===----------------------------------------------------------------------===//
17
17
  // DuckDB
18
18
  //
@@ -27469,7 +27469,8 @@ public:
27469
27469
  public:
27470
27470
  template <class T, class BASE>
27471
27471
  static string ToString(const T &entry) {
27472
- return entry.left->ToString() + " " + ExpressionTypeToOperator(entry.type) + " " + entry.right->ToString();
27472
+ return StringUtil::Format("(%s) %s (%s)", entry.left->ToString(), ExpressionTypeToOperator(entry.type),
27473
+ entry.right->ToString());
27473
27474
  }
27474
27475
  };
27475
27476
  } // namespace duckdb