duckdb 0.8.2-dev4514.0 → 0.8.2-dev4623.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/lib/duckdb.js +11 -1
  2. package/package.json +3 -1
  3. package/src/connection.cpp +48 -7
  4. package/src/duckdb/src/catalog/catalog.cpp +5 -0
  5. package/src/duckdb/src/catalog/duck_catalog.cpp +4 -0
  6. package/src/duckdb/src/common/enum_util.cpp +24 -0
  7. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +213 -2
  8. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +59 -38
  9. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
  10. package/src/duckdb/src/function/table/arrow.cpp +18 -13
  11. package/src/duckdb/src/function/table/read_csv.cpp +3 -130
  12. package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +83 -0
  13. package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +5 -0
  14. package/src/duckdb/src/function/table/system_functions.cpp +1 -0
  15. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  16. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
  17. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +1 -0
  18. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +1 -1
  19. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  20. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +36 -0
  21. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +24 -0
  22. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +36 -4
  23. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
  24. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
  25. package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -1
  26. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +10 -4
  27. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +3 -3
  28. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
  29. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +27 -4
  30. package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +4 -2
  31. package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +22 -1
  32. package/src/duckdb/src/include/duckdb/storage/database_size.hpp +6 -0
  33. package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
  34. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -0
  35. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +6 -1
  36. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +7 -3
  37. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
  38. package/src/duckdb/src/main/connection.cpp +4 -6
  39. package/src/duckdb/src/main/extension/extension_install.cpp +2 -1
  40. package/src/duckdb/src/main/relation/read_csv_relation.cpp +28 -9
  41. package/src/duckdb/src/main/relation/table_function_relation.cpp +8 -2
  42. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +1 -4
  43. package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -4
  44. package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +47 -10
  45. package/src/duckdb/src/storage/checkpoint_manager.cpp +0 -2
  46. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +6 -1
  47. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +62 -12
  48. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -1
  49. package/src/duckdb/src/storage/data_pointer.cpp +20 -0
  50. package/src/duckdb/src/storage/local_storage.cpp +3 -7
  51. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -15
  52. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +4 -0
  53. package/src/duckdb/src/storage/single_file_block_manager.cpp +15 -9
  54. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  55. package/src/duckdb/src/storage/storage_manager.cpp +5 -0
  56. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -0
  57. package/src/duckdb/src/storage/table/column_data.cpp +17 -14
  58. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +4 -8
  59. package/src/duckdb/src/storage/table/column_segment.cpp +21 -12
  60. package/src/duckdb/ub_src_function_table_system.cpp +2 -0
  61. package/src/duckdb/ub_src_storage.cpp +2 -0
  62. package/src/duckdb_node.hpp +1 -0
  63. package/test/close_hang.test.ts +39 -0
@@ -85,25 +85,6 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
85
85
  }
86
86
  }
87
87
 
88
- uint8_t GetCandidateSpecificity(const LogicalType &candidate_type) {
89
- //! Const ht with accepted auto_types and their weights in specificity
90
- const duckdb::unordered_map<uint8_t, uint8_t> auto_type_candidates_specificity {
91
- {(uint8_t)LogicalTypeId::VARCHAR, 0}, {(uint8_t)LogicalTypeId::TIMESTAMP, 1},
92
- {(uint8_t)LogicalTypeId::DATE, 2}, {(uint8_t)LogicalTypeId::TIME, 3},
93
- {(uint8_t)LogicalTypeId::DOUBLE, 4}, {(uint8_t)LogicalTypeId::FLOAT, 5},
94
- {(uint8_t)LogicalTypeId::BIGINT, 6}, {(uint8_t)LogicalTypeId::INTEGER, 7},
95
- {(uint8_t)LogicalTypeId::SMALLINT, 8}, {(uint8_t)LogicalTypeId::TINYINT, 9},
96
- {(uint8_t)LogicalTypeId::BOOLEAN, 10}, {(uint8_t)LogicalTypeId::SQLNULL, 11}};
97
-
98
- auto id = (uint8_t)candidate_type.id();
99
- auto it = auto_type_candidates_specificity.find(id);
100
- if (it == auto_type_candidates_specificity.end()) {
101
- throw BinderException("Auto Type Candidate of type %s is not accepted as a valid input",
102
- EnumUtil::ToString(candidate_type.id()));
103
- }
104
- return it->second;
105
- }
106
-
107
88
  static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctionBindInput &input,
108
89
  vector<LogicalType> &return_types, vector<string> &names) {
109
90
 
@@ -111,117 +92,9 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
111
92
  auto &options = result->options;
112
93
  result->files = MultiFileReader::GetFileList(context, input.inputs[0], "CSV");
113
94
 
114
- bool explicitly_set_columns = false;
115
- for (auto &kv : input.named_parameters) {
116
- if (MultiFileReader::ParseOption(kv.first, kv.second, options.file_options, context)) {
117
- continue;
118
- }
119
- auto loption = StringUtil::Lower(kv.first);
120
- if (loption == "columns") {
121
- explicitly_set_columns = true;
122
- auto &child_type = kv.second.type();
123
- if (child_type.id() != LogicalTypeId::STRUCT) {
124
- throw BinderException("read_csv columns requires a struct as input");
125
- }
126
- auto &struct_children = StructValue::GetChildren(kv.second);
127
- D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
128
- for (idx_t i = 0; i < struct_children.size(); i++) {
129
- auto &name = StructType::GetChildName(child_type, i);
130
- auto &val = struct_children[i];
131
- names.push_back(name);
132
- if (val.type().id() != LogicalTypeId::VARCHAR) {
133
- throw BinderException("read_csv requires a type specification as string");
134
- }
135
- return_types.emplace_back(TransformStringToLogicalType(StringValue::Get(val), context));
136
- }
137
- if (names.empty()) {
138
- throw BinderException("read_csv requires at least a single column as input!");
139
- }
140
- } else if (loption == "auto_type_candidates") {
141
- options.auto_type_candidates.clear();
142
- map<uint8_t, LogicalType> candidate_types;
143
- // We always have the extremes of Null and Varchar, so we can default to varchar if the
144
- // sniffer is not able to confidently detect that column type
145
- candidate_types[GetCandidateSpecificity(LogicalType::VARCHAR)] = LogicalType::VARCHAR;
146
- candidate_types[GetCandidateSpecificity(LogicalType::SQLNULL)] = LogicalType::SQLNULL;
147
-
148
- auto &child_type = kv.second.type();
149
- if (child_type.id() != LogicalTypeId::LIST) {
150
- throw BinderException("read_csv auto_types requires a list as input");
151
- }
152
- auto &list_children = ListValue::GetChildren(kv.second);
153
- if (list_children.empty()) {
154
- throw BinderException("auto_type_candidates requires at least one type");
155
- }
156
- for (auto &child : list_children) {
157
- if (child.type().id() != LogicalTypeId::VARCHAR) {
158
- throw BinderException("auto_type_candidates requires a type specification as string");
159
- }
160
- auto candidate_type = TransformStringToLogicalType(StringValue::Get(child), context);
161
- candidate_types[GetCandidateSpecificity(candidate_type)] = candidate_type;
162
- }
163
- for (auto &candidate_type : candidate_types) {
164
- options.auto_type_candidates.emplace_back(candidate_type.second);
165
- }
166
- } else if (loption == "column_names" || loption == "names") {
167
- if (!options.name_list.empty()) {
168
- throw BinderException("read_csv_auto column_names/names can only be supplied once");
169
- }
170
- if (kv.second.IsNull()) {
171
- throw BinderException("read_csv_auto %s cannot be NULL", kv.first);
172
- }
173
- auto &children = ListValue::GetChildren(kv.second);
174
- for (auto &child : children) {
175
- options.name_list.push_back(StringValue::Get(child));
176
- }
177
- } else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
178
- auto &child_type = kv.second.type();
179
- if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
180
- throw BinderException("read_csv_auto %s requires a struct or list as input", kv.first);
181
- }
182
- if (!options.sql_type_list.empty()) {
183
- throw BinderException("read_csv_auto column_types/types/dtypes can only be supplied once");
184
- }
185
- vector<string> sql_type_names;
186
- if (child_type.id() == LogicalTypeId::STRUCT) {
187
- auto &struct_children = StructValue::GetChildren(kv.second);
188
- D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
189
- for (idx_t i = 0; i < struct_children.size(); i++) {
190
- auto &name = StructType::GetChildName(child_type, i);
191
- auto &val = struct_children[i];
192
- if (val.type().id() != LogicalTypeId::VARCHAR) {
193
- throw BinderException("read_csv_auto %s requires a type specification as string", kv.first);
194
- }
195
- sql_type_names.push_back(StringValue::Get(val));
196
- options.sql_types_per_column[name] = i;
197
- }
198
- } else {
199
- auto &list_child = ListType::GetChildType(child_type);
200
- if (list_child.id() != LogicalTypeId::VARCHAR) {
201
- throw BinderException("read_csv_auto %s requires a list of types (varchar) as input", kv.first);
202
- }
203
- auto &children = ListValue::GetChildren(kv.second);
204
- for (auto &child : children) {
205
- sql_type_names.push_back(StringValue::Get(child));
206
- }
207
- }
208
- options.sql_type_list.reserve(sql_type_names.size());
209
- for (auto &sql_type : sql_type_names) {
210
- auto def_type = TransformStringToLogicalType(sql_type);
211
- if (def_type.id() == LogicalTypeId::USER) {
212
- throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
213
- kv.first);
214
- }
215
- options.sql_type_list.push_back(std::move(def_type));
216
- }
217
- } else if (loption == "all_varchar") {
218
- options.all_varchar = BooleanValue::Get(kv.second);
219
- } else if (loption == "normalize_names") {
220
- options.normalize_names = BooleanValue::Get(kv.second);
221
- } else {
222
- options.SetReadOption(loption, kv.second, names);
223
- }
224
- }
95
+ options.FromNamedParameters(input.named_parameters, context, return_types, names);
96
+ bool explicitly_set_columns = options.explicitly_set_columns;
97
+
225
98
  options.file_options.AutoDetectHivePartitioning(result->files, context);
226
99
 
227
100
  if (!options.auto_detect && return_types.empty()) {
@@ -0,0 +1,83 @@
1
+ #include "duckdb/function/table/system_functions.hpp"
2
+
3
+ #include "duckdb/catalog/catalog.hpp"
4
+ #include "duckdb/storage/database_size.hpp"
5
+ #include "duckdb/main/database_manager.hpp"
6
+ #include "duckdb/function/function_set.hpp"
7
+ namespace duckdb {
8
+
9
+ struct PragmaMetadataFunctionData : public TableFunctionData {
10
+ explicit PragmaMetadataFunctionData() {
11
+ }
12
+
13
+ vector<MetadataBlockInfo> metadata_info;
14
+ };
15
+
16
+ struct PragmaMetadataOperatorData : public GlobalTableFunctionState {
17
+ PragmaMetadataOperatorData() : offset(0) {
18
+ }
19
+
20
+ idx_t offset;
21
+ };
22
+
23
+ static unique_ptr<FunctionData> PragmaMetadataInfoBind(ClientContext &context, TableFunctionBindInput &input,
24
+ vector<LogicalType> &return_types, vector<string> &names) {
25
+ names.emplace_back("block_id");
26
+ return_types.emplace_back(LogicalType::BIGINT);
27
+
28
+ names.emplace_back("total_blocks");
29
+ return_types.emplace_back(LogicalType::BIGINT);
30
+
31
+ names.emplace_back("free_blocks");
32
+ return_types.emplace_back(LogicalType::BIGINT);
33
+
34
+ names.emplace_back("free_list");
35
+ return_types.emplace_back(LogicalType::LIST(LogicalType::BIGINT));
36
+
37
+ string db_name =
38
+ input.inputs.empty() ? DatabaseManager::GetDefaultDatabase(context) : StringValue::Get(input.inputs[0]);
39
+ auto &catalog = Catalog::GetCatalog(context, db_name);
40
+ auto result = make_uniq<PragmaMetadataFunctionData>();
41
+ result->metadata_info = catalog.GetMetadataInfo(context);
42
+ return std::move(result);
43
+ }
44
+
45
+ unique_ptr<GlobalTableFunctionState> PragmaMetadataInfoInit(ClientContext &context, TableFunctionInitInput &input) {
46
+ return make_uniq<PragmaMetadataOperatorData>();
47
+ }
48
+
49
+ static void PragmaMetadataInfoFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
50
+ auto &bind_data = data_p.bind_data->Cast<PragmaMetadataFunctionData>();
51
+ auto &data = data_p.global_state->Cast<PragmaMetadataOperatorData>();
52
+ idx_t count = 0;
53
+ while (data.offset < bind_data.metadata_info.size() && count < STANDARD_VECTOR_SIZE) {
54
+ auto &entry = bind_data.metadata_info[data.offset++];
55
+
56
+ idx_t col_idx = 0;
57
+ // block_id
58
+ output.SetValue(col_idx++, count, Value::BIGINT(entry.block_id));
59
+ // total_blocks
60
+ output.SetValue(col_idx++, count, Value::BIGINT(entry.total_blocks));
61
+ // free_blocks
62
+ output.SetValue(col_idx++, count, Value::BIGINT(entry.free_list.size()));
63
+ // free_list
64
+ vector<Value> list_values;
65
+ for (auto &free_id : entry.free_list) {
66
+ list_values.push_back(Value::BIGINT(free_id));
67
+ }
68
+ output.SetValue(col_idx++, count, Value::LIST(LogicalType::BIGINT, std::move(list_values)));
69
+ count++;
70
+ }
71
+ output.SetCardinality(count);
72
+ }
73
+
74
+ void PragmaMetadataInfo::RegisterFunction(BuiltinFunctions &set) {
75
+ TableFunctionSet metadata_info("pragma_metadata_info");
76
+ metadata_info.AddFunction(
77
+ TableFunction({}, PragmaMetadataInfoFunction, PragmaMetadataInfoBind, PragmaMetadataInfoInit));
78
+ metadata_info.AddFunction(TableFunction({LogicalType::VARCHAR}, PragmaMetadataInfoFunction, PragmaMetadataInfoBind,
79
+ PragmaMetadataInfoInit));
80
+ set.AddFunction(metadata_info);
81
+ }
82
+
83
+ } // namespace duckdb
@@ -76,6 +76,9 @@ static unique_ptr<FunctionData> PragmaStorageInfoBind(ClientContext &context, Ta
76
76
  names.emplace_back("block_offset");
77
77
  return_types.emplace_back(LogicalType::BIGINT);
78
78
 
79
+ names.emplace_back("segment_info");
80
+ return_types.emplace_back(LogicalType::VARCHAR);
81
+
79
82
  auto qname = QualifiedName::Parse(input.inputs[0].GetValue<string>());
80
83
 
81
84
  // look up the table name in the catalog
@@ -133,6 +136,8 @@ static void PragmaStorageInfoFunction(ClientContext &context, TableFunctionInput
133
136
  output.SetValue(col_idx++, count, Value());
134
137
  output.SetValue(col_idx++, count, Value());
135
138
  }
139
+ // segment_info
140
+ output.SetValue(col_idx++, count, Value(entry.segment_info));
136
141
  count++;
137
142
  }
138
143
  output.SetCardinality(count);
@@ -14,6 +14,7 @@ void BuiltinFunctions::RegisterSQLiteFunctions() {
14
14
  PragmaCollations::RegisterFunction(*this);
15
15
  PragmaTableInfo::RegisterFunction(*this);
16
16
  PragmaStorageInfo::RegisterFunction(*this);
17
+ PragmaMetadataInfo::RegisterFunction(*this);
17
18
  PragmaDatabaseSize::RegisterFunction(*this);
18
19
  PragmaLastProfilingOutput::RegisterFunction(*this);
19
20
  PragmaDetailedProfilingOutput::RegisterFunction(*this);
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.8.2-dev4514"
2
+ #define DUCKDB_VERSION "0.8.2-dev4623"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "38c6e8ccce"
5
+ #define DUCKDB_SOURCE_ID "52a47a6b31"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -34,6 +34,7 @@ struct CreateIndexInfo;
34
34
  struct CreateTypeInfo;
35
35
  struct CreateTableInfo;
36
36
  struct DatabaseSize;
37
+ struct MetadataBlockInfo;
37
38
 
38
39
  class AttachedDatabase;
39
40
  class ClientContext;
@@ -266,6 +267,7 @@ public:
266
267
  unique_ptr<LogicalOperator> plan) = 0;
267
268
 
268
269
  virtual DatabaseSize GetDatabaseSize(ClientContext &context) = 0;
270
+ virtual vector<MetadataBlockInfo> GetMetadataInfo(ClientContext &context);
269
271
 
270
272
  virtual bool InMemory() = 0;
271
273
  virtual string GetDBPath() = 0;
@@ -54,6 +54,7 @@ public:
54
54
  unique_ptr<LogicalOperator> plan) override;
55
55
 
56
56
  DatabaseSize GetDatabaseSize(ClientContext &context) override;
57
+ vector<MetadataBlockInfo> GetMetadataInfo(ClientContext &context) override;
57
58
 
58
59
  DUCKDB_API bool InMemory() override;
59
60
  DUCKDB_API string GetDBPath() override;
@@ -18,7 +18,7 @@ class ColumnDataCollection;
18
18
  class ColumnDataRowCollection;
19
19
 
20
20
  enum class ValueRenderAlignment { LEFT, MIDDLE, RIGHT };
21
- enum class RenderMode { ROWS, COLUMNS };
21
+ enum class RenderMode : uint8_t { ROWS, COLUMNS };
22
22
 
23
23
  struct BoxRendererConfig {
24
24
  // a max_width of 0 means we default to the terminal width
@@ -216,6 +216,8 @@ enum class QuoteRule : uint8_t;
216
216
 
217
217
  enum class RelationType : uint8_t;
218
218
 
219
+ enum class RenderMode : uint8_t;
220
+
219
221
  enum class ResultModifierType : uint8_t;
220
222
 
221
223
  enum class SampleMethod : uint8_t;
@@ -565,6 +567,9 @@ const char* EnumUtil::ToChars<QuoteRule>(QuoteRule value);
565
567
  template<>
566
568
  const char* EnumUtil::ToChars<RelationType>(RelationType value);
567
569
 
570
+ template<>
571
+ const char* EnumUtil::ToChars<RenderMode>(RenderMode value);
572
+
568
573
  template<>
569
574
  const char* EnumUtil::ToChars<ResultModifierType>(ResultModifierType value);
570
575
 
@@ -950,6 +955,9 @@ QuoteRule EnumUtil::FromString<QuoteRule>(const char *value);
950
955
  template<>
951
956
  RelationType EnumUtil::FromString<RelationType>(const char *value);
952
957
 
958
+ template<>
959
+ RenderMode EnumUtil::FromString<RenderMode>(const char *value);
960
+
953
961
  template<>
954
962
  ResultModifierType EnumUtil::FromString<ResultModifierType>(const char *value);
955
963
 
@@ -15,10 +15,12 @@
15
15
  namespace duckdb {
16
16
  class ClientContext;
17
17
  class Catalog;
18
+ class DatabaseInstance;
18
19
  enum class ExpressionType : uint8_t;
19
20
 
20
21
  struct DeserializationData {
21
22
  stack<reference<ClientContext>> contexts;
23
+ stack<reference<DatabaseInstance>> databases;
22
24
  stack<idx_t> enums;
23
25
  stack<reference<bound_parameter_map_t>> parameter_data;
24
26
  stack<reference<LogicalType>> types;
@@ -74,6 +76,23 @@ inline void DeserializationData::Unset<LogicalOperatorType>() {
74
76
  enums.pop();
75
77
  }
76
78
 
79
+ template <>
80
+ inline void DeserializationData::Set(CompressionType type) {
81
+ enums.push(idx_t(type));
82
+ }
83
+
84
+ template <>
85
+ inline CompressionType DeserializationData::Get() {
86
+ AssertNotEmpty(enums);
87
+ return CompressionType(enums.top());
88
+ }
89
+
90
+ template <>
91
+ inline void DeserializationData::Unset<CompressionType>() {
92
+ AssertNotEmpty(enums);
93
+ enums.pop();
94
+ }
95
+
77
96
  template <>
78
97
  inline void DeserializationData::Set(CatalogType type) {
79
98
  enums.push(idx_t(type));
@@ -108,6 +127,23 @@ inline void DeserializationData::Unset<ClientContext>() {
108
127
  contexts.pop();
109
128
  }
110
129
 
130
+ template <>
131
+ inline void DeserializationData::Set(DatabaseInstance &db) {
132
+ databases.push(db);
133
+ }
134
+
135
+ template <>
136
+ inline DatabaseInstance &DeserializationData::Get() {
137
+ AssertNotEmpty(databases);
138
+ return databases.top();
139
+ }
140
+
141
+ template <>
142
+ inline void DeserializationData::Unset<DatabaseInstance>() {
143
+ AssertNotEmpty(databases);
144
+ databases.pop();
145
+ }
146
+
111
147
  template <>
112
148
  inline void DeserializationData::Set(bound_parameter_map_t &context) {
113
149
  parameter_data.push(context);
@@ -159,18 +159,33 @@ struct CSVReaderOptions {
159
159
  string suffix;
160
160
  string write_newline;
161
161
 
162
+ //! The date format to use (if any is specified)
163
+ map<LogicalTypeId, StrpTimeFormat> date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
162
164
  //! The date format to use for writing (if any is specified)
163
165
  map<LogicalTypeId, StrfTimeFormat> write_date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
166
+ //! Whether or not a type format is specified
167
+ map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
164
168
 
165
169
  void Serialize(Serializer &serializer) const;
166
170
  static CSVReaderOptions Deserialize(Deserializer &deserializer);
167
171
 
168
172
  void SetCompression(const string &compression);
173
+
174
+ bool GetHeader() const;
169
175
  void SetHeader(bool has_header);
176
+
177
+ string GetEscape() const;
170
178
  void SetEscape(const string &escape);
179
+
180
+ int64_t GetSkipRows() const;
181
+ void SetSkipRows(int64_t rows);
182
+
183
+ string GetQuote() const;
171
184
  void SetQuote(const string &quote);
172
185
  void SetDelimiter(const string &delimiter);
186
+ string GetDelimiter() const;
173
187
 
188
+ NewLineIdentifier GetNewline() const;
174
189
  void SetNewline(const string &input);
175
190
  //! Set an option that is supported by both reading and writing functions, called by
176
191
  //! the SetReadOption and SetWriteOption methods
@@ -182,7 +197,16 @@ struct CSVReaderOptions {
182
197
  void SetReadOption(const string &loption, const Value &value, vector<string> &expected_names);
183
198
  void SetWriteOption(const string &loption, const Value &value);
184
199
  void SetDateFormat(LogicalTypeId type, const string &format, bool read_format);
200
+ void ToNamedParameters(named_parameter_map_t &out);
201
+ void FromNamedParameters(named_parameter_map_t &in, ClientContext &context, vector<LogicalType> &return_types,
202
+ vector<string> &names);
185
203
 
186
204
  string ToString() const;
205
+
206
+ named_parameter_map_t OutputReadSettings();
207
+
208
+ public:
209
+ //! Whether columns were explicitly provided through named parameters
210
+ bool explicitly_set_columns = false;
187
211
  };
188
212
  } // namespace duckdb
@@ -14,6 +14,7 @@
14
14
  #include "duckdb/common/map.hpp"
15
15
  #include "duckdb/storage/storage_info.hpp"
16
16
  #include "duckdb/common/mutex.hpp"
17
+ #include "duckdb/storage/data_pointer.hpp"
17
18
 
18
19
  namespace duckdb {
19
20
  class DatabaseInstance;
@@ -21,6 +22,7 @@ class ColumnData;
21
22
  class ColumnDataCheckpointer;
22
23
  class ColumnSegment;
23
24
  class SegmentStatistics;
25
+ struct ColumnSegmentState;
24
26
 
25
27
  struct ColumnFetchState;
26
28
  struct ColumnScanState;
@@ -62,6 +64,11 @@ struct CompressedSegmentState {
62
64
  virtual ~CompressedSegmentState() {
63
65
  }
64
66
 
67
+ //! Display info for PRAGMA storage_info
68
+ virtual string GetSegmentInfo() const { // LCOV_EXCL_START
69
+ return "";
70
+ } // LCOV_EXCL_STOP
71
+
65
72
  template <class TARGET>
66
73
  TARGET &Cast() {
67
74
  D_ASSERT(dynamic_cast<TARGET *>(this));
@@ -75,7 +82,7 @@ struct CompressedSegmentState {
75
82
  };
76
83
 
77
84
  struct CompressionAppendState {
78
- CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) {
85
+ explicit CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) {
79
86
  }
80
87
  virtual ~CompressionAppendState() {
81
88
  }
@@ -139,13 +146,24 @@ typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &stat
139
146
  //===--------------------------------------------------------------------===//
140
147
  // Append (optional)
141
148
  //===--------------------------------------------------------------------===//
142
- typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(ColumnSegment &segment, block_id_t block_id);
149
+ typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(
150
+ ColumnSegment &segment, block_id_t block_id, optional_ptr<ColumnSegmentState> segment_state);
143
151
  typedef unique_ptr<CompressionAppendState> (*compression_init_append_t)(ColumnSegment &segment);
144
152
  typedef idx_t (*compression_append_t)(CompressionAppendState &append_state, ColumnSegment &segment,
145
153
  SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count);
146
154
  typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
147
155
  typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
148
156
 
157
+ //===--------------------------------------------------------------------===//
158
+ // Serialization (optional)
159
+ //===--------------------------------------------------------------------===//
160
+ //! Function prototype for serializing the segment state
161
+ typedef unique_ptr<ColumnSegmentState> (*compression_serialize_state_t)(ColumnSegment &segment);
162
+ //! Function prototype for deserializing the segment state
163
+ typedef unique_ptr<ColumnSegmentState> (*compression_deserialize_state_t)(Deserializer &deserializer);
164
+ //! Function prototype for cleaning up the segment state when the column data is dropped
165
+ typedef void (*compression_cleanup_state_t)(ColumnSegment &segment);
166
+
149
167
  class CompressionFunction {
150
168
  public:
151
169
  CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
@@ -157,12 +175,16 @@ public:
157
175
  compression_init_segment_t init_segment = nullptr,
158
176
  compression_init_append_t init_append = nullptr, compression_append_t append = nullptr,
159
177
  compression_finalize_append_t finalize_append = nullptr,
160
- compression_revert_append_t revert_append = nullptr)
178
+ compression_revert_append_t revert_append = nullptr,
179
+ compression_serialize_state_t serialize_state = nullptr,
180
+ compression_deserialize_state_t deserialize_state = nullptr,
181
+ compression_cleanup_state_t cleanup_state = nullptr)
161
182
  : type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze),
162
183
  init_compression(init_compression), compress(compress), compress_finalize(compress_finalize),
163
184
  init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip),
164
185
  init_segment(init_segment), init_append(init_append), append(append), finalize_append(finalize_append),
165
- revert_append(revert_append) {
186
+ revert_append(revert_append), serialize_state(serialize_state), deserialize_state(deserialize_state),
187
+ cleanup_state(cleanup_state) {
166
188
  }
167
189
 
168
190
  //! Compression type
@@ -218,6 +240,16 @@ public:
218
240
  compression_finalize_append_t finalize_append;
219
241
  //! Revert append (optional)
220
242
  compression_revert_append_t revert_append;
243
+
244
+ // State serialize functions
245
+ //! This is only necessary if the segment state has information that must be written to disk in the metadata
246
+
247
+ //! Serialize the segment state to the metadata (optional)
248
+ compression_serialize_state_t serialize_state;
249
+ //! Deserialize the segment state to the metadata (optional)
250
+ compression_deserialize_state_t deserialize_state;
251
+ //! Cleanup the segment state (optional)
252
+ compression_cleanup_state_t cleanup_state;
221
253
  };
222
254
 
223
255
  //! The set of compression functions
@@ -129,6 +129,8 @@ public:
129
129
 
130
130
  //! Scan Function
131
131
  static void ArrowScanFunction(ClientContext &context, TableFunctionInput &data, DataChunk &output);
132
+ static void PopulateArrowTableType(ArrowTableType &arrow_table, ArrowSchemaWrapper &schema_p, vector<string> &names,
133
+ vector<LogicalType> &return_types);
132
134
 
133
135
  protected:
134
136
  //! Defines Maximum Number of Threads
@@ -25,6 +25,10 @@ struct PragmaStorageInfo {
25
25
  static void RegisterFunction(BuiltinFunctions &set);
26
26
  };
27
27
 
28
+ struct PragmaMetadataInfo {
29
+ static void RegisterFunction(BuiltinFunctions &set);
30
+ };
31
+
28
32
  struct PragmaLastProfilingOutput {
29
33
  static void RegisterFunction(BuiltinFunctions &set);
30
34
  };
@@ -131,7 +131,7 @@ public:
131
131
 
132
132
  //! Reads CSV file
133
133
  DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file);
134
- DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, CSVReaderOptions &options);
134
+ DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, named_parameter_map_t &&options);
135
135
  DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, const vector<string> &columns);
136
136
 
137
137
  //! Reads Parquet file
@@ -118,6 +118,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
118
118
  {"st_dwithin_spheroid", "spatial"},
119
119
  {"st_envelope", "spatial"},
120
120
  {"st_equals", "spatial"},
121
+ {"st_extent", "spatial"},
121
122
  {"st_flipcoordinates", "spatial"},
122
123
  {"st_geometrytype", "spatial"},
123
124
  {"st_geomfromgeojson", "spatial"},
@@ -126,6 +127,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
126
127
  {"st_geomfromtext", "spatial"},
127
128
  {"st_geomfromwkb", "spatial"},
128
129
  {"st_intersection", "spatial"},
130
+ {"st_intersection_agg", "spatial"},
129
131
  {"st_intersects", "spatial"},
130
132
  {"st_isclosed", "spatial"},
131
133
  {"st_isempty", "spatial"},
@@ -159,9 +161,14 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
159
161
  {"st_touches", "spatial"},
160
162
  {"st_transform", "spatial"},
161
163
  {"st_union", "spatial"},
164
+ {"st_union_agg", "spatial"},
162
165
  {"st_within", "spatial"},
163
166
  {"st_x", "spatial"},
167
+ {"st_xmax", "spatial"},
168
+ {"st_xmin", "spatial"},
164
169
  {"st_y", "spatial"},
170
+ {"st_ymax", "spatial"},
171
+ {"st_ymin", "spatial"},
165
172
  {"stem", "fts"},
166
173
  {"text", "excel"},
167
174
  {"to_arrow_ipc", "arrow"},
@@ -220,10 +227,9 @@ static constexpr ExtensionEntry EXTENSION_FILE_PREFIXES[] = {
220
227
 
221
228
  // Note: these are currently hardcoded in scripts/generate_extensions_function.py
222
229
  // TODO: automate by passing though to script via duckdb
223
- static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {{".parquet", "parquet"},
224
- {".json", "json"},
225
- {".jsonl", "json"},
226
- {".ndjson", "json"}}; // END_OF_EXTENSION_FILE_POSTFIXES
230
+ static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {
231
+ {".parquet", "parquet"}, {".json", "json"}, {".jsonl", "json"}, {".ndjson", "json"},
232
+ {".shp", "spatial"}, {".gpkg", "spatial"}, {".fgb", "spatial"}}; // END_OF_EXTENSION_FILE_POSTFIXES
227
233
 
228
234
  // Note: these are currently hardcoded in scripts/generate_extensions_function.py
229
235
  // TODO: automate by passing though to script via duckdb
@@ -10,16 +10,16 @@
10
10
 
11
11
  #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
12
12
  #include "duckdb/main/relation/table_function_relation.hpp"
13
+ #include "duckdb/common/shared_ptr.hpp"
14
+ #include "duckdb/common/case_insensitive_map.hpp"
13
15
 
14
16
  namespace duckdb {
15
17
 
16
- struct CSVReaderOptions;
17
-
18
18
  class ReadCSVRelation : public TableFunctionRelation {
19
19
  public:
20
20
  ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, vector<ColumnDefinition> columns,
21
21
  string alias = string());
22
- ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, CSVReaderOptions options,
22
+ ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, named_parameter_map_t &&options,
23
23
  string alias = string());
24
24
 
25
25
  string alias;
@@ -35,6 +35,7 @@ public:
35
35
  string ToString(idx_t depth) override;
36
36
  string GetAlias() override;
37
37
  void AddNamedParameter(const string &name, Value argument);
38
+ void SetNamedParameters(named_parameter_map_t &&named_parameters);
38
39
 
39
40
  private:
40
41
  void InitializeColumns();