duckdb 0.7.1-dev90.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +1 -1
  2. package/binding.gyp +7 -7
  3. package/package.json +3 -3
  4. package/src/duckdb/extension/json/buffered_json_reader.cpp +50 -9
  5. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +7 -2
  6. package/src/duckdb/extension/json/include/json_scan.hpp +45 -10
  7. package/src/duckdb/extension/json/json_functions/copy_json.cpp +35 -22
  8. package/src/duckdb/extension/json/json_functions/json_create.cpp +8 -8
  9. package/src/duckdb/extension/json/json_functions/json_structure.cpp +8 -3
  10. package/src/duckdb/extension/json/json_functions/json_transform.cpp +54 -10
  11. package/src/duckdb/extension/json/json_functions/read_json.cpp +104 -49
  12. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +5 -3
  13. package/src/duckdb/extension/json/json_functions.cpp +7 -0
  14. package/src/duckdb/extension/json/json_scan.cpp +144 -38
  15. package/src/duckdb/extension/parquet/column_reader.cpp +7 -0
  16. package/src/duckdb/extension/parquet/include/column_reader.hpp +1 -0
  17. package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -10
  18. package/src/duckdb/src/catalog/catalog.cpp +62 -13
  19. package/src/duckdb/src/catalog/catalog_entry/index_catalog_entry.cpp +8 -7
  20. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -1
  21. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  22. package/src/duckdb/src/catalog/default/default_functions.cpp +1 -0
  23. package/src/duckdb/src/catalog/default/default_views.cpp +1 -1
  24. package/src/duckdb/src/common/bind_helpers.cpp +55 -0
  25. package/src/duckdb/src/common/file_system.cpp +23 -9
  26. package/src/duckdb/src/common/hive_partitioning.cpp +1 -0
  27. package/src/duckdb/src/common/local_file_system.cpp +4 -4
  28. package/src/duckdb/src/common/string_util.cpp +8 -4
  29. package/src/duckdb/src/common/types/partitioned_column_data.cpp +1 -0
  30. package/src/duckdb/src/common/types.cpp +37 -11
  31. package/src/duckdb/src/execution/column_binding_resolver.cpp +5 -2
  32. package/src/duckdb/src/execution/index/art/art.cpp +117 -67
  33. package/src/duckdb/src/execution/index/art/art_key.cpp +24 -12
  34. package/src/duckdb/src/execution/index/art/leaf.cpp +7 -8
  35. package/src/duckdb/src/execution/index/art/node.cpp +13 -27
  36. package/src/duckdb/src/execution/index/art/node16.cpp +5 -8
  37. package/src/duckdb/src/execution/index/art/node256.cpp +3 -5
  38. package/src/duckdb/src/execution/index/art/node4.cpp +4 -7
  39. package/src/duckdb/src/execution/index/art/node48.cpp +5 -8
  40. package/src/duckdb/src/execution/index/art/prefix.cpp +2 -3
  41. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +6 -27
  42. package/src/duckdb/src/execution/operator/helper/physical_reset.cpp +1 -9
  43. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -9
  44. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +7 -9
  45. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +9 -0
  46. package/src/duckdb/src/execution/physical_operator.cpp +6 -6
  47. package/src/duckdb/src/function/pragma/pragma_queries.cpp +38 -11
  48. package/src/duckdb/src/function/scalar/generic/current_setting.cpp +2 -2
  49. package/src/duckdb/src/function/scalar/list/array_slice.cpp +2 -3
  50. package/src/duckdb/src/function/scalar/map/map.cpp +69 -21
  51. package/src/duckdb/src/function/scalar/string/like.cpp +6 -3
  52. package/src/duckdb/src/function/table/read_csv.cpp +16 -5
  53. package/src/duckdb/src/function/table/system/duckdb_temporary_files.cpp +59 -0
  54. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  55. package/src/duckdb/src/function/table/table_scan.cpp +3 -0
  56. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  57. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +7 -1
  58. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  59. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/index_catalog_entry.hpp +1 -1
  60. package/src/duckdb/src/include/duckdb/common/bind_helpers.hpp +2 -0
  61. package/src/duckdb/src/include/duckdb/common/enums/statement_type.hpp +1 -1
  62. package/src/duckdb/src/include/duckdb/common/enums/wal_type.hpp +3 -0
  63. package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +9 -1
  65. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +4 -4
  66. package/src/duckdb/src/include/duckdb/common/string_util.hpp +9 -2
  67. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +37 -41
  68. package/src/duckdb/src/include/duckdb/execution/index/art/art_key.hpp +8 -11
  69. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
  70. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +2 -1
  71. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  72. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -2
  73. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/main/{extension_functions.hpp → extension_entries.hpp} +27 -5
  75. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +11 -1
  76. package/src/duckdb/src/include/duckdb/main/settings.hpp +9 -0
  77. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +0 -7
  78. package/src/duckdb/src/include/duckdb/parser/query_node/select_node.hpp +1 -1
  79. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +2 -2
  80. package/src/duckdb/src/include/duckdb/parser/statement/copy_statement.hpp +1 -1
  81. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +3 -3
  82. package/src/duckdb/src/include/duckdb/parser/tableref/subqueryref.hpp +1 -1
  83. package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -0
  84. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +10 -3
  85. package/src/duckdb/src/include/duckdb/planner/operator/logical_execute.hpp +1 -5
  86. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +1 -2
  87. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +8 -0
  88. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +7 -1
  89. package/src/duckdb/src/include/duckdb/storage/index.hpp +47 -38
  90. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +7 -0
  91. package/src/duckdb/src/main/client_context.cpp +2 -0
  92. package/src/duckdb/src/main/config.cpp +1 -0
  93. package/src/duckdb/src/main/database.cpp +14 -5
  94. package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
  95. package/src/duckdb/src/main/extension/extension_helper.cpp +15 -0
  96. package/src/duckdb/src/main/extension/extension_install.cpp +60 -16
  97. package/src/duckdb/src/main/extension/extension_load.cpp +62 -13
  98. package/src/duckdb/src/main/settings/settings.cpp +16 -0
  99. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +2 -6
  100. package/src/duckdb/src/parallel/pipeline_executor.cpp +1 -55
  101. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +3 -0
  102. package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -13
  103. package/src/duckdb/src/parser/statement/delete_statement.cpp +3 -0
  104. package/src/duckdb/src/parser/statement/insert_statement.cpp +9 -0
  105. package/src/duckdb/src/parser/statement/update_statement.cpp +3 -0
  106. package/src/duckdb/src/parser/transform/expression/transform_case.cpp +3 -3
  107. package/src/duckdb/src/planner/bind_context.cpp +1 -1
  108. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +3 -0
  109. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +7 -14
  110. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +13 -0
  111. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +2 -2
  112. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +22 -1
  113. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +32 -1
  114. package/src/duckdb/src/planner/logical_operator.cpp +4 -1
  115. package/src/duckdb/src/storage/buffer_manager.cpp +105 -26
  116. package/src/duckdb/src/storage/compression/bitpacking.cpp +16 -7
  117. package/src/duckdb/src/storage/data_table.cpp +66 -3
  118. package/src/duckdb/src/storage/index.cpp +1 -1
  119. package/src/duckdb/src/storage/local_storage.cpp +1 -1
  120. package/src/duckdb/src/storage/table_index_list.cpp +1 -2
  121. package/src/duckdb/src/storage/wal_replay.cpp +68 -0
  122. package/src/duckdb/src/storage/write_ahead_log.cpp +21 -1
  123. package/src/duckdb/src/transaction/commit_state.cpp +5 -2
  124. package/src/duckdb/third_party/concurrentqueue/blockingconcurrentqueue.h +2 -2
  125. package/src/duckdb/third_party/fmt/include/fmt/core.h +1 -2
  126. package/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +4 -4
  127. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  128. package/src/statement.cpp +46 -12
  129. package/test/arrow.test.ts +3 -3
  130. package/test/prepare.test.ts +39 -1
  131. package/test/typescript_decls.test.ts +1 -1
@@ -66,46 +66,94 @@ void MapConversionVerify(Vector &vector, idx_t count) {
66
66
  }
67
67
  }
68
68
 
69
- static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result) {
70
- D_ASSERT(result.GetType().id() == LogicalTypeId::MAP);
71
-
72
- //! Otherwise if its not a constant vector, this breaks the optimizer
73
- result.SetVectorType(VectorType::CONSTANT_VECTOR);
74
- for (idx_t i = 0; i < args.ColumnCount(); i++) {
75
- if (args.data[i].GetVectorType() != VectorType::CONSTANT_VECTOR) {
76
- result.SetVectorType(VectorType::FLAT_VECTOR);
69
+ // Example:
70
+ // source: [1,2,3], expansion_factor: 4
71
+ // target (result): [1,2,3,1,2,3,1,2,3,1,2,3]
72
+ static void CreateExpandedVector(const Vector &source, Vector &target, idx_t expansion_factor) {
73
+ idx_t count = ListVector::GetListSize(source);
74
+ auto &entry = ListVector::GetEntry(source);
75
+
76
+ idx_t target_idx = 0;
77
+ for (idx_t copy = 0; copy < expansion_factor; copy++) {
78
+ for (idx_t key_idx = 0; key_idx < count; key_idx++) {
79
+ target.SetValue(target_idx, entry.GetValue(key_idx));
80
+ target_idx++;
77
81
  }
78
82
  }
83
+ D_ASSERT(target_idx == count * expansion_factor);
84
+ }
85
+
86
+ static void AlignVectorToReference(const Vector &original, const Vector &reference, idx_t tuple_count, Vector &result) {
87
+ auto original_length = ListVector::GetListSize(original);
88
+ auto new_length = ListVector::GetListSize(reference);
89
+
90
+ Vector expanded_const(ListType::GetChildType(original.GetType()), new_length);
91
+
92
+ auto expansion_factor = new_length / original_length;
93
+ if (expansion_factor != tuple_count) {
94
+ throw InvalidInputException("Error in MAP creation: key list and value list do not align. i.e. different "
95
+ "size or incompatible structure");
96
+ }
97
+ CreateExpandedVector(original, expanded_const, expansion_factor);
98
+ result.Reference(expanded_const);
99
+ }
100
+
101
+ static void MapFunction(DataChunk &args, ExpressionState &state, Vector &result) {
102
+ D_ASSERT(result.GetType().id() == LogicalTypeId::MAP);
79
103
 
80
104
  auto &key_vector = MapVector::GetKeys(result);
81
105
  auto &value_vector = MapVector::GetValues(result);
82
- auto list_data = ListVector::GetData(result);
106
+ auto result_data = ListVector::GetData(result);
83
107
 
108
+ result.SetVectorType(VectorType::CONSTANT_VECTOR);
84
109
  if (args.data.empty()) {
85
110
  ListVector::SetListSize(result, 0);
86
- list_data->offset = 0;
87
- list_data->length = 0;
111
+ result_data->offset = 0;
112
+ result_data->length = 0;
88
113
  result.Verify(args.size());
89
114
  return;
90
115
  }
91
116
 
92
- auto args_data = ListVector::GetData(args.data[0]);
117
+ bool keys_are_const = args.data[0].GetVectorType() == VectorType::CONSTANT_VECTOR;
118
+ bool values_are_const = args.data[1].GetVectorType() == VectorType::CONSTANT_VECTOR;
119
+ if (!keys_are_const || !values_are_const) {
120
+ result.SetVectorType(VectorType::FLAT_VECTOR);
121
+ }
122
+
93
123
  auto key_count = ListVector::GetListSize(args.data[0]);
94
124
  auto value_count = ListVector::GetListSize(args.data[1]);
95
- if (key_count != value_count) {
96
- throw InvalidInputException(
97
- "Error in MAP creation: key list has a different size from value list (%lld keys, %lld values)", key_count,
98
- value_count);
125
+ auto key_data = ListVector::GetData(args.data[0]);
126
+ auto value_data = ListVector::GetData(args.data[1]);
127
+ auto src_data = key_data;
128
+
129
+ if (keys_are_const && !values_are_const) {
130
+ AlignVectorToReference(args.data[0], args.data[1], args.size(), key_vector);
131
+ src_data = value_data;
132
+ } else if (values_are_const && !keys_are_const) {
133
+ AlignVectorToReference(args.data[1], args.data[0], args.size(), value_vector);
134
+ } else {
135
+ if (key_count != value_count || memcmp(key_data, value_data, args.size() * sizeof(list_entry_t)) != 0) {
136
+ throw InvalidInputException("Error in MAP creation: key list and value list do not align. i.e. different "
137
+ "size or incompatible structure");
138
+ }
99
139
  }
100
- ListVector::Reserve(result, key_count);
101
- ListVector::SetListSize(result, key_count);
102
140
 
141
+ ListVector::SetListSize(result, MaxValue(key_count, value_count));
142
+
143
+ result_data = ListVector::GetData(result);
103
144
  for (idx_t i = 0; i < args.size(); i++) {
104
- list_data[i] = args_data[i];
145
+ result_data[i] = src_data[i];
146
+ }
147
+
148
+ // check whether one of the vectors has already been referenced to an expanded vector in the case of const/non-const
149
+ // combination. If not, then referencing is still necessary
150
+ if (!(keys_are_const && !values_are_const)) {
151
+ key_vector.Reference(ListVector::GetEntry(args.data[0]));
152
+ }
153
+ if (!(values_are_const && !keys_are_const)) {
154
+ value_vector.Reference(ListVector::GetEntry(args.data[1]));
105
155
  }
106
156
 
107
- key_vector.Reference(ListVector::GetEntry(args.data[0]));
108
- value_vector.Reference(ListVector::GetEntry(args.data[1]));
109
157
  MapConversionVerify(result, args.size());
110
158
  result.Verify(args.size());
111
159
  }
@@ -220,7 +220,7 @@ bool LikeOperatorFunction(string_t &s, string_t &pat, char escape) {
220
220
  return LikeOperatorFunction(s.GetDataUnsafe(), s.GetSize(), pat.GetDataUnsafe(), pat.GetSize(), escape);
221
221
  }
222
222
 
223
- bool LikeFun::Glob(const char *string, idx_t slen, const char *pattern, idx_t plen) {
223
+ bool LikeFun::Glob(const char *string, idx_t slen, const char *pattern, idx_t plen, bool allow_question_mark) {
224
224
  idx_t sidx = 0;
225
225
  idx_t pidx = 0;
226
226
  main_loop : {
@@ -249,8 +249,11 @@ main_loop : {
249
249
  return false;
250
250
  }
251
251
  case '?':
252
- // wildcard: matches anything but null
253
- break;
252
+ // when enabled: matches anything but null
253
+ if (allow_question_mark) {
254
+ break;
255
+ }
256
+ DUCKDB_EXPLICIT_FALLTHROUGH;
254
257
  case '[':
255
258
  pidx++;
256
259
  goto parse_bracket;
@@ -28,10 +28,7 @@ unique_ptr<CSVFileHandle> ReadCSV::OpenCSV(const string &file_path, FileCompress
28
28
  void ReadCSVData::InitializeFiles(ClientContext &context, const vector<string> &patterns) {
29
29
  auto &fs = FileSystem::GetFileSystem(context);
30
30
  for (auto &file_pattern : patterns) {
31
- auto found_files = fs.Glob(file_pattern, context);
32
- if (found_files.empty()) {
33
- throw FileSystem::MissingFileException(file_pattern, context);
34
- }
31
+ auto found_files = fs.GlobFiles(file_pattern, context);
35
32
  files.insert(files.end(), found_files.begin(), found_files.end());
36
33
  }
37
34
  }
@@ -99,6 +96,17 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
99
96
  if (names.empty()) {
100
97
  throw BinderException("read_csv requires at least a single column as input!");
101
98
  }
99
+ } else if (loption == "column_names" || loption == "names") {
100
+ if (!options.name_list.empty()) {
101
+ throw BinderException("read_csv_auto column_names/names can only be supplied once");
102
+ }
103
+ if (kv.second.IsNull()) {
104
+ throw BinderException("read_csv_auto %s cannot be NULL", kv.first);
105
+ }
106
+ auto &children = ListValue::GetChildren(kv.second);
107
+ for (auto &child : children) {
108
+ options.name_list.push_back(StringValue::Get(child));
109
+ }
102
110
  } else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
103
111
  auto &child_type = kv.second.type();
104
112
  if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
@@ -301,7 +309,7 @@ public:
301
309
  progress = double(bytes_read) / double(file_size);
302
310
  }
303
311
  // now get the total percentage of files read
304
- double percentage = double(file_index) / total_files;
312
+ double percentage = double(file_index - 1) / total_files;
305
313
  percentage += (double(1) / double(total_files)) * progress;
306
314
  return percentage * 100;
307
315
  }
@@ -573,6 +581,7 @@ struct SingleThreadedCSVState : public GlobalTableFunctionState {
573
581
  {
574
582
  lock_guard<mutex> l(csv_lock);
575
583
  if (initial_reader) {
584
+ total_size = initial_reader->file_handle ? initial_reader->file_handle->FileSize() : 0;
576
585
  return std::move(initial_reader);
577
586
  }
578
587
  if (next_file >= total_files) {
@@ -961,6 +970,8 @@ TableFunction ReadCSVTableFunction::GetAutoFunction(bool list_parameter) {
961
970
  read_csv_auto.named_parameters["column_types"] = LogicalType::ANY;
962
971
  read_csv_auto.named_parameters["dtypes"] = LogicalType::ANY;
963
972
  read_csv_auto.named_parameters["types"] = LogicalType::ANY;
973
+ read_csv_auto.named_parameters["names"] = LogicalType::LIST(LogicalType::VARCHAR);
974
+ read_csv_auto.named_parameters["column_names"] = LogicalType::LIST(LogicalType::VARCHAR);
964
975
  return read_csv_auto;
965
976
  }
966
977
 
@@ -0,0 +1,59 @@
1
+ #include "duckdb/function/table/system_functions.hpp"
2
+ #include "duckdb/storage/buffer_manager.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ struct DuckDBTemporaryFilesData : public GlobalTableFunctionState {
7
+ DuckDBTemporaryFilesData() : offset(0) {
8
+ }
9
+
10
+ vector<TemporaryFileInformation> entries;
11
+ idx_t offset;
12
+ };
13
+
14
+ static unique_ptr<FunctionData> DuckDBTemporaryFilesBind(ClientContext &context, TableFunctionBindInput &input,
15
+ vector<LogicalType> &return_types, vector<string> &names) {
16
+ names.emplace_back("path");
17
+ return_types.emplace_back(LogicalType::VARCHAR);
18
+
19
+ names.emplace_back("size");
20
+ return_types.emplace_back(LogicalType::BIGINT);
21
+
22
+ return nullptr;
23
+ }
24
+
25
+ unique_ptr<GlobalTableFunctionState> DuckDBTemporaryFilesInit(ClientContext &context, TableFunctionInitInput &input) {
26
+ auto result = make_unique<DuckDBTemporaryFilesData>();
27
+
28
+ result->entries = BufferManager::GetBufferManager(context).GetTemporaryFiles();
29
+ return std::move(result);
30
+ }
31
+
32
+ void DuckDBTemporaryFilesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
33
+ auto &data = (DuckDBTemporaryFilesData &)*data_p.global_state;
34
+ if (data.offset >= data.entries.size()) {
35
+ // finished returning values
36
+ return;
37
+ }
38
+ // start returning values
39
+ // either fill up the chunk or return all the remaining columns
40
+ idx_t count = 0;
41
+ while (data.offset < data.entries.size() && count < STANDARD_VECTOR_SIZE) {
42
+ auto &entry = data.entries[data.offset++];
43
+ // return values:
44
+ idx_t col = 0;
45
+ // database_name, VARCHAR
46
+ output.SetValue(col++, count, entry.path);
47
+ // database_oid, BIGINT
48
+ output.SetValue(col++, count, Value::BIGINT(entry.size));
49
+ count++;
50
+ }
51
+ output.SetCardinality(count);
52
+ }
53
+
54
+ void DuckDBTemporaryFilesFun::RegisterFunction(BuiltinFunctions &set) {
55
+ set.AddFunction(TableFunction("duckdb_temporary_files", {}, DuckDBTemporaryFilesFunction, DuckDBTemporaryFilesBind,
56
+ DuckDBTemporaryFilesInit));
57
+ }
58
+
59
+ } // namespace duckdb
@@ -29,6 +29,7 @@ void BuiltinFunctions::RegisterSQLiteFunctions() {
29
29
  DuckDBSequencesFun::RegisterFunction(*this);
30
30
  DuckDBSettingsFun::RegisterFunction(*this);
31
31
  DuckDBTablesFun::RegisterFunction(*this);
32
+ DuckDBTemporaryFilesFun::RegisterFunction(*this);
32
33
  DuckDBTypesFun::RegisterFunction(*this);
33
34
  DuckDBViewsFun::RegisterFunction(*this);
34
35
  TestAllTypesFun::RegisterFunction(*this);
@@ -287,9 +287,12 @@ void TableScanPushdownComplexFilter(ClientContext &context, LogicalGet &get, Fun
287
287
  // behold
288
288
  storage.info->indexes.Scan([&](Index &index) {
289
289
  // first rewrite the index expression so the ColumnBindings align with the column bindings of the current table
290
+
290
291
  if (index.unbound_expressions.size() > 1) {
292
+ // NOTE: index scans are not (yet) supported for compound index keys
291
293
  return false;
292
294
  }
295
+
293
296
  auto index_expression = index.unbound_expressions[0]->Copy();
294
297
  bool rewrite_possible = true;
295
298
  RewriteIndexExpression(index, get, *index_expression, rewrite_possible);
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.7.1-dev90"
2
+ #define DUCKDB_VERSION "0.7.1"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "1b87fb94ca"
5
+ #define DUCKDB_SOURCE_ID "b00b93f0b1"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -29,6 +29,7 @@ struct CreateFunctionInfo;
29
29
  struct CreateViewInfo;
30
30
  struct CreateSequenceInfo;
31
31
  struct CreateCollationInfo;
32
+ struct CreateIndexInfo;
32
33
  struct CreateTypeInfo;
33
34
  struct CreateTableInfo;
34
35
  struct DatabaseSize;
@@ -137,6 +138,9 @@ public:
137
138
  //! Creates a collation in the catalog
138
139
  DUCKDB_API CatalogEntry *CreateCollation(CatalogTransaction transaction, CreateCollationInfo *info);
139
140
  DUCKDB_API CatalogEntry *CreateCollation(ClientContext &context, CreateCollationInfo *info);
141
+ //! Creates an index in the catalog
142
+ DUCKDB_API CatalogEntry *CreateIndex(CatalogTransaction transaction, CreateIndexInfo *info);
143
+ DUCKDB_API CatalogEntry *CreateIndex(ClientContext &context, CreateIndexInfo *info);
140
144
 
141
145
  //! Creates a table in the catalog.
142
146
  DUCKDB_API CatalogEntry *CreateTable(CatalogTransaction transaction, SchemaCatalogEntry *schema,
@@ -153,7 +157,7 @@ public:
153
157
  //! Create a scalar or aggregate function in the catalog
154
158
  DUCKDB_API CatalogEntry *CreateFunction(CatalogTransaction transaction, SchemaCatalogEntry *schema,
155
159
  CreateFunctionInfo *info);
156
- //! Creates a table in the catalog.
160
+ //! Creates a view in the catalog
157
161
  DUCKDB_API CatalogEntry *CreateView(CatalogTransaction transaction, SchemaCatalogEntry *schema,
158
162
  CreateViewInfo *info);
159
163
  //! Creates a table in the catalog.
@@ -251,6 +255,8 @@ public:
251
255
 
252
256
  virtual void Verify();
253
257
 
258
+ static CatalogException UnrecognizedConfigurationError(ClientContext &context, const string &name);
259
+
254
260
  protected:
255
261
  //! Reference to the database
256
262
  AttachedDatabase &db;
@@ -1,7 +1,7 @@
1
1
  //===----------------------------------------------------------------------===//
2
2
  // DuckDB
3
3
  //
4
- // duckdb/catalog/catalog_entry/dindex_catalog_entry.hpp
4
+ // duckdb/catalog/catalog_entry/duck_index_entry.hpp
5
5
  //
6
6
  //
7
7
  //===----------------------------------------------------------------------===//
@@ -34,7 +34,7 @@ public:
34
34
 
35
35
  public:
36
36
  string ToSQL() override;
37
- void Serialize(duckdb::MetaBlockWriter &serializer);
37
+ void Serialize(Serializer &serializer);
38
38
  static unique_ptr<CreateIndexInfo> Deserialize(Deserializer &source, ClientContext &context);
39
39
 
40
40
  virtual string GetSchemaName() = 0;
@@ -17,5 +17,7 @@ class Value;
17
17
  Value ConvertVectorToValue(vector<Value> set);
18
18
  vector<bool> ParseColumnList(const vector<Value> &set, vector<string> &names, const string &option_name);
19
19
  vector<bool> ParseColumnList(const Value &value, vector<string> &names, const string &option_name);
20
+ vector<idx_t> ParseColumnsOrdered(const vector<Value> &set, vector<string> &names, const string &loption);
21
+ vector<idx_t> ParseColumnsOrdered(const Value &value, vector<string> &names, const string &loption);
20
22
 
21
23
  } // namespace duckdb
@@ -48,7 +48,7 @@ enum class StatementType : uint8_t {
48
48
 
49
49
  };
50
50
 
51
- string StatementTypeToString(StatementType type);
51
+ DUCKDB_API string StatementTypeToString(StatementType type);
52
52
 
53
53
  enum class StatementReturnType : uint8_t {
54
54
  QUERY_RESULT, // the statement returns a query result (e.g. for display to the user)
@@ -41,6 +41,9 @@ enum class WALType : uint8_t {
41
41
  CREATE_TABLE_MACRO = 21,
42
42
  DROP_TABLE_MACRO = 22,
43
43
 
44
+ CREATE_INDEX = 23,
45
+ DROP_INDEX = 24,
46
+
44
47
  // -----------------------------
45
48
  // Data
46
49
  // -----------------------------
@@ -188,6 +188,7 @@ public:
188
188
  //! Runs a glob on the file system, returning a list of matching files
189
189
  DUCKDB_API virtual vector<string> Glob(const string &path, FileOpener *opener = nullptr);
190
190
  DUCKDB_API virtual vector<string> Glob(const string &path, ClientContext &context);
191
+ DUCKDB_API vector<string> GlobFiles(const string &path, ClientContext &context);
191
192
 
192
193
  //! registers a sub-file system to handle certain file name prefixes, e.g. http:// etc.
193
194
  DUCKDB_API virtual void RegisterSubSystem(unique_ptr<FileSystem> sub_fs);
@@ -201,7 +202,6 @@ public:
201
202
 
202
203
  //! Whether or not a sub-system can handle a specific file path
203
204
  DUCKDB_API virtual bool CanHandleFile(const string &fpath);
204
- DUCKDB_API static IOException MissingFileException(const string &file_path, ClientContext &context);
205
205
 
206
206
  //! Set the file pointer of a file handle to a specified location. Reads and writes will happen from this location
207
207
  DUCKDB_API virtual void Seek(FileHandle &handle, idx_t location);
@@ -52,7 +52,15 @@ struct HivePartitionKey {
52
52
 
53
53
  struct Equality {
54
54
  bool operator()(const HivePartitionKey &a, const HivePartitionKey &b) const {
55
- return a.values == b.values;
55
+ if (a.values.size() != b.values.size()) {
56
+ return false;
57
+ }
58
+ for (idx_t i = 0; i < a.values.size(); i++) {
59
+ if (!Value::NotDistinctFrom(a.values[i], b.values[i])) {
60
+ return false;
61
+ }
62
+ }
63
+ return true;
56
64
  }
57
65
  };
58
66
  };
@@ -78,13 +78,13 @@ protected:
78
78
  case 2:
79
79
  case 3:
80
80
  case 4:
81
- return GetBufferSize(1);
81
+ return GetBufferSize(1 << 1);
82
82
  case 5:
83
- return GetBufferSize(2);
83
+ return GetBufferSize(1 << 2);
84
84
  case 6:
85
- return GetBufferSize(3);
85
+ return GetBufferSize(1 << 3);
86
86
  default:
87
- return GetBufferSize(4);
87
+ return GetBufferSize(1 << 4);
88
88
  }
89
89
  }
90
90
  void InitializeAppendStateInternal(PartitionedColumnDataAppendState &state) const override;
@@ -145,8 +145,15 @@ public:
145
145
  DUCKDB_API static string Replace(string source, const string &from, const string &to);
146
146
 
147
147
  //! Get the levenshtein distance from two strings
148
- DUCKDB_API static idx_t LevenshteinDistance(const string &s1, const string &s2);
149
-
148
+ //! The not_equal_penalty is the penalty given when two characters in a string are not equal
149
+ //! The regular levenshtein distance has a not equal penalty of 1, which means changing a character is as expensive
150
+ //! as adding or removing one For similarity searches we often want to give extra weight to changing a character For
151
+ //! example: with an equal penalty of 1, "pg_am" is closer to "depdelay" than "depdelay_minutes"
152
+ //! with an equal penalty of 3, "depdelay_minutes" is closer to "depdelay" than to "pg_am"
153
+ DUCKDB_API static idx_t LevenshteinDistance(const string &s1, const string &s2, idx_t not_equal_penalty = 1);
154
+
155
+ //! Returns the similarity score between two strings
156
+ DUCKDB_API static idx_t SimilarityScore(const string &s1, const string &s2);
150
157
  //! Get the top-n strings (sorted by the given score distance) from a set of scores.
151
158
  //! At least one entry is returned (if there is one).
152
159
  //! Strings are only returned if they have a score less than the threshold.
@@ -29,24 +29,21 @@ namespace duckdb {
29
29
  class ConflictManager;
30
30
 
31
31
  struct ARTIndexScanState : public IndexScanState {
32
- ARTIndexScanState() : checked(false), result_index(0) {
33
- }
34
32
 
33
+ //! Scan predicates (single predicate scan or range scan)
35
34
  Value values[2];
35
+ //! Expressions of the scan predicates
36
36
  ExpressionType expressions[2];
37
- bool checked;
37
+ bool checked = false;
38
+ //! All scanned row IDs
38
39
  vector<row_t> result_ids;
39
40
  Iterator iterator;
40
- //! Stores the current leaf
41
- Leaf *cur_leaf = nullptr;
42
- //! Offset to leaf
43
- idx_t result_index = 0;
44
41
  };
45
42
 
46
43
  enum class VerifyExistenceType : uint8_t {
47
- APPEND = 0, // for purpose to append into table
48
- APPEND_FK = 1, // for purpose to append into table has foreign key
49
- DELETE_FK = 2 // for purpose to delete from table related to foreign key
44
+ APPEND = 0, // appends to a table
45
+ APPEND_FK = 1, // appends to a table that has a foreign key
46
+ DELETE_FK = 2 // delete from a table that has a foreign key
50
47
  };
51
48
 
52
49
  class ART : public Index {
@@ -62,47 +59,44 @@ public:
62
59
  Node *tree;
63
60
 
64
61
  public:
65
- //! Initialize a scan on the index with the given expression and column ids
66
- //! to fetch from the base table for a single predicate
62
+ //! Initialize a single predicate scan on the index with the given expression and column IDs
67
63
  unique_ptr<IndexScanState> InitializeScanSinglePredicate(const Transaction &transaction, const Value &value,
68
64
  ExpressionType expression_type) override;
69
-
70
- //! Initialize a scan on the index with the given expression and column ids
71
- //! to fetch from the base table for two predicates
65
+ //! Initialize a two predicate scan on the index with the given expression and column IDs
72
66
  unique_ptr<IndexScanState> InitializeScanTwoPredicates(Transaction &transaction, const Value &low_value,
73
67
  ExpressionType low_expression_type, const Value &high_value,
74
68
  ExpressionType high_expression_type) override;
75
-
76
- //! Perform a lookup on the index
69
+ //! Performs a lookup on the index, fetching up to max_count result IDs. Returns true if all row IDs were fetched,
70
+ //! and false otherwise
77
71
  bool Scan(Transaction &transaction, DataTable &table, IndexScanState &state, idx_t max_count,
78
72
  vector<row_t> &result_ids) override;
79
- //! Append entries to the index
73
+
74
+ //! Called when data is appended to the index. The lock obtained from InitializeLock must be held
80
75
  bool Append(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
81
- //! Verify that data can be appended to the index
76
+ //! Verify that data can be appended to the index without a constraint violation
82
77
  void VerifyAppend(DataChunk &chunk) override;
83
- //! Verify that data can be appended to the index
78
+ //! Verify that data can be appended to the index without a constraint violation using the conflict manager
84
79
  void VerifyAppend(DataChunk &chunk, ConflictManager &conflict_manager) override;
85
- //! Verify that data can be appended to the index for foreign key constraint
86
- void VerifyAppendForeignKey(DataChunk &chunk) override;
87
- //! Verify that data can be delete from the index for foreign key constraint
88
- void VerifyDeleteForeignKey(DataChunk &chunk) override;
89
- //! Delete entries in the index
80
+ //! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held
90
81
  void Delete(IndexLock &lock, DataChunk &entries, Vector &row_identifiers) override;
91
- //! Insert data into the index
82
+ //! Insert a chunk of entries into the index
92
83
  bool Insert(IndexLock &lock, DataChunk &data, Vector &row_ids) override;
93
84
 
94
85
  //! Construct an ART from a vector of sorted keys
95
86
  bool ConstructFromSorted(idx_t count, vector<Key> &keys, Vector &row_identifiers);
96
87
 
97
- //! Search Equal and fetches the row IDs
88
+ //! Search equal values and fetches the row IDs
98
89
  bool SearchEqual(Key &key, idx_t max_count, vector<row_t> &result_ids);
99
- //! Search Equal used for Joins that do not need to fetch data
90
+ //! Search equal values used for joins that do not need to fetch data
100
91
  void SearchEqualJoinNoFetch(Key &key, idx_t &result_size);
101
- //! Serialized the ART
92
+
93
+ //! Serializes the index and returns the pair of block_id offset positions
102
94
  BlockPointer Serialize(duckdb::MetaBlockWriter &writer) override;
103
95
 
104
- //! Merge two ARTs
96
+ //! Merge another index into this index. The lock obtained from InitializeLock must be held, and the other
97
+ //! index must also be locked during the merge
105
98
  bool MergeIndexes(IndexLock &state, Index *other_index) override;
99
+
106
100
  //! Generate ART keys for an input chunk
107
101
  static void GenerateKeys(ArenaAllocator &allocator, DataChunk &input, vector<Key> &keys);
108
102
 
@@ -110,30 +104,32 @@ public:
110
104
  string GenerateErrorKeyName(DataChunk &input, idx_t row);
111
105
  //! Generate the matching error message for a constraint violation
112
106
  string GenerateConstraintErrorMessage(VerifyExistenceType verify_type, const string &key_name);
107
+ //! Performs constraint checking for a chunk of input data
108
+ void CheckConstraintsForChunk(DataChunk &input, ConflictManager &conflict_manager) override;
113
109
 
114
110
  //! Returns the string representation of an ART
115
111
  string ToString() override;
116
- //! Verifies that the memory_size value of the ART matches its actual size
112
+ //! Verifies that the in-memory size value of the index matches its actual size
117
113
  void Verify() override;
114
+ //! Increases the memory size by the difference between the old size and the current size
115
+ //! and performs verifications
116
+ void IncreaseAndVerifyMemorySize(idx_t old_memory_size) override;
118
117
 
119
118
  private:
120
- //! Insert a row id into a leaf node
119
+ //! Insert a row ID into a leaf
121
120
  bool InsertToLeaf(Leaf &leaf, row_t row_id);
122
- //! Insert the leaf value into the tree
121
+ //! Insert a key into the tree
123
122
  bool Insert(Node *&node, Key &key, idx_t depth, row_t row_id);
124
-
125
- //! Erase element from leaf (if leaf has more than one value) or eliminate the leaf itself
123
+ //! Erase a key from the tree (if a leaf has more than one value) or erase the leaf itself
126
124
  void Erase(Node *&node, Key &key, idx_t depth, row_t row_id);
127
-
128
- //! Perform 'Lookup' for an entire chunk, marking which succeeded
129
- void LookupValues(DataChunk &input, ConflictManager &conflict_manager) final override;
130
-
131
- //! Find the node with a matching key, optimistic version
125
+ //! Find the node with a matching key, or return nullptr if not found
132
126
  Leaf *Lookup(Node *node, Key &key, idx_t depth);
133
-
127
+ //! Returns all row IDs belonging to a key greater (or equal) than the search key
134
128
  bool SearchGreater(ARTIndexScanState *state, Key &key, bool inclusive, idx_t max_count, vector<row_t> &result_ids);
129
+ //! Returns all row IDs belonging to a key less (or equal) than the upper_bound
135
130
  bool SearchLess(ARTIndexScanState *state, Key &upper_bound, bool inclusive, idx_t max_count,
136
131
  vector<row_t> &result_ids);
132
+ //! Returns all row IDs belonging to a key within the range of lower_bound and upper_bound
137
133
  bool SearchCloseRange(ARTIndexScanState *state, Key &lower_bound, Key &upper_bound, bool left_inclusive,
138
134
  bool right_inclusive, idx_t max_count, vector<row_t> &result_ids);
139
135
  };
@@ -28,24 +28,24 @@ public:
28
28
 
29
29
  public:
30
30
  template <class T>
31
- static inline Key CreateKey(ArenaAllocator &allocator, T element) {
31
+ static inline Key CreateKey(ArenaAllocator &allocator, const LogicalType &type, T element) {
32
32
  auto data = Key::CreateData<T>(allocator, element);
33
33
  return Key(data, sizeof(element));
34
34
  }
35
35
 
36
36
  template <class T>
37
- static inline Key CreateKey(ArenaAllocator &allocator, const Value &element) {
38
- return CreateKey(allocator, element.GetValueUnsafe<T>());
37
+ static inline Key CreateKey(ArenaAllocator &allocator, const LogicalType &type, const Value &element) {
38
+ return CreateKey(allocator, type, element.GetValueUnsafe<T>());
39
39
  }
40
40
 
41
41
  template <class T>
42
- static inline void CreateKey(ArenaAllocator &allocator, Key &key, T element) {
42
+ static inline void CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, T element) {
43
43
  key.data = Key::CreateData<T>(allocator, element);
44
44
  key.len = sizeof(element);
45
45
  }
46
46
 
47
47
  template <class T>
48
- static inline void CreateKey(ArenaAllocator &allocator, Key &key, const Value element) {
48
+ static inline void CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, const Value element) {
49
49
  key.data = Key::CreateData<T>(allocator, element.GetValueUnsafe<T>());
50
50
  key.len = sizeof(element);
51
51
  }
@@ -76,12 +76,9 @@ private:
76
76
  };
77
77
 
78
78
  template <>
79
- Key Key::CreateKey(ArenaAllocator &allocator, string_t value);
79
+ Key Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, string_t value);
80
80
  template <>
81
- Key Key::CreateKey(ArenaAllocator &allocator, const char *value);
81
+ Key Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, const char *value);
82
82
  template <>
83
- void Key::CreateKey(ArenaAllocator &allocator, Key &key, string_t value);
84
- template <>
85
- void Key::CreateKey(ArenaAllocator &allocator, Key &key, const char *value);
86
-
83
+ void Key::CreateKey(ArenaAllocator &allocator, const LogicalType &type, Key &key, string_t value);
87
84
  } // namespace duckdb
@@ -75,6 +75,8 @@ struct BufferedCSVReaderOptions {
75
75
  case_insensitive_map_t<idx_t> sql_types_per_column;
76
76
  //! User-defined SQL type list
77
77
  vector<LogicalType> sql_type_list;
78
+ //! User-defined name list
79
+ vector<string> name_list;
78
80
  //===--------------------------------------------------------------------===//
79
81
  // ReadCSVOptions
80
82
  //===--------------------------------------------------------------------===//