duckdb 0.8.2-dev4514.0 → 0.8.2-dev4623.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/duckdb.js +11 -1
- package/package.json +3 -1
- package/src/connection.cpp +48 -7
- package/src/duckdb/src/catalog/catalog.cpp +5 -0
- package/src/duckdb/src/catalog/duck_catalog.cpp +4 -0
- package/src/duckdb/src/common/enum_util.cpp +24 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +213 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +59 -38
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/table/arrow.cpp +18 -13
- package/src/duckdb/src/function/table/read_csv.cpp +3 -130
- package/src/duckdb/src/function/table/system/pragma_metadata_info.cpp +83 -0
- package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +5 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +36 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +24 -0
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +36 -4
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +10 -4
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +27 -4
- package/src/duckdb/src/include/duckdb/storage/checkpoint/write_overflow_strings_to_disk.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +22 -1
- package/src/duckdb/src/include/duckdb/storage/database_size.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +6 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
- package/src/duckdb/src/main/connection.cpp +4 -6
- package/src/duckdb/src/main/extension/extension_install.cpp +2 -1
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +28 -9
- package/src/duckdb/src/main/relation/table_function_relation.cpp +8 -2
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/row_group_writer.cpp +1 -4
- package/src/duckdb/src/storage/checkpoint/write_overflow_strings_to_disk.cpp +47 -10
- package/src/duckdb/src/storage/checkpoint_manager.cpp +0 -2
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +6 -1
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +62 -12
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +2 -1
- package/src/duckdb/src/storage/data_pointer.cpp +20 -0
- package/src/duckdb/src/storage/local_storage.cpp +3 -7
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +29 -15
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +4 -0
- package/src/duckdb/src/storage/single_file_block_manager.cpp +15 -9
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/storage_manager.cpp +5 -0
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +3 -0
- package/src/duckdb/src/storage/table/column_data.cpp +17 -14
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +4 -8
- package/src/duckdb/src/storage/table/column_segment.cpp +21 -12
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/src/duckdb/ub_src_storage.cpp +2 -0
- package/src/duckdb_node.hpp +1 -0
- package/test/close_hang.test.ts +39 -0
@@ -85,25 +85,6 @@ void ReadCSVData::FinalizeRead(ClientContext &context) {
|
|
85
85
|
}
|
86
86
|
}
|
87
87
|
|
88
|
-
uint8_t GetCandidateSpecificity(const LogicalType &candidate_type) {
|
89
|
-
//! Const ht with accepted auto_types and their weights in specificity
|
90
|
-
const duckdb::unordered_map<uint8_t, uint8_t> auto_type_candidates_specificity {
|
91
|
-
{(uint8_t)LogicalTypeId::VARCHAR, 0}, {(uint8_t)LogicalTypeId::TIMESTAMP, 1},
|
92
|
-
{(uint8_t)LogicalTypeId::DATE, 2}, {(uint8_t)LogicalTypeId::TIME, 3},
|
93
|
-
{(uint8_t)LogicalTypeId::DOUBLE, 4}, {(uint8_t)LogicalTypeId::FLOAT, 5},
|
94
|
-
{(uint8_t)LogicalTypeId::BIGINT, 6}, {(uint8_t)LogicalTypeId::INTEGER, 7},
|
95
|
-
{(uint8_t)LogicalTypeId::SMALLINT, 8}, {(uint8_t)LogicalTypeId::TINYINT, 9},
|
96
|
-
{(uint8_t)LogicalTypeId::BOOLEAN, 10}, {(uint8_t)LogicalTypeId::SQLNULL, 11}};
|
97
|
-
|
98
|
-
auto id = (uint8_t)candidate_type.id();
|
99
|
-
auto it = auto_type_candidates_specificity.find(id);
|
100
|
-
if (it == auto_type_candidates_specificity.end()) {
|
101
|
-
throw BinderException("Auto Type Candidate of type %s is not accepted as a valid input",
|
102
|
-
EnumUtil::ToString(candidate_type.id()));
|
103
|
-
}
|
104
|
-
return it->second;
|
105
|
-
}
|
106
|
-
|
107
88
|
static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctionBindInput &input,
|
108
89
|
vector<LogicalType> &return_types, vector<string> &names) {
|
109
90
|
|
@@ -111,117 +92,9 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
111
92
|
auto &options = result->options;
|
112
93
|
result->files = MultiFileReader::GetFileList(context, input.inputs[0], "CSV");
|
113
94
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
continue;
|
118
|
-
}
|
119
|
-
auto loption = StringUtil::Lower(kv.first);
|
120
|
-
if (loption == "columns") {
|
121
|
-
explicitly_set_columns = true;
|
122
|
-
auto &child_type = kv.second.type();
|
123
|
-
if (child_type.id() != LogicalTypeId::STRUCT) {
|
124
|
-
throw BinderException("read_csv columns requires a struct as input");
|
125
|
-
}
|
126
|
-
auto &struct_children = StructValue::GetChildren(kv.second);
|
127
|
-
D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
|
128
|
-
for (idx_t i = 0; i < struct_children.size(); i++) {
|
129
|
-
auto &name = StructType::GetChildName(child_type, i);
|
130
|
-
auto &val = struct_children[i];
|
131
|
-
names.push_back(name);
|
132
|
-
if (val.type().id() != LogicalTypeId::VARCHAR) {
|
133
|
-
throw BinderException("read_csv requires a type specification as string");
|
134
|
-
}
|
135
|
-
return_types.emplace_back(TransformStringToLogicalType(StringValue::Get(val), context));
|
136
|
-
}
|
137
|
-
if (names.empty()) {
|
138
|
-
throw BinderException("read_csv requires at least a single column as input!");
|
139
|
-
}
|
140
|
-
} else if (loption == "auto_type_candidates") {
|
141
|
-
options.auto_type_candidates.clear();
|
142
|
-
map<uint8_t, LogicalType> candidate_types;
|
143
|
-
// We always have the extremes of Null and Varchar, so we can default to varchar if the
|
144
|
-
// sniffer is not able to confidently detect that column type
|
145
|
-
candidate_types[GetCandidateSpecificity(LogicalType::VARCHAR)] = LogicalType::VARCHAR;
|
146
|
-
candidate_types[GetCandidateSpecificity(LogicalType::SQLNULL)] = LogicalType::SQLNULL;
|
147
|
-
|
148
|
-
auto &child_type = kv.second.type();
|
149
|
-
if (child_type.id() != LogicalTypeId::LIST) {
|
150
|
-
throw BinderException("read_csv auto_types requires a list as input");
|
151
|
-
}
|
152
|
-
auto &list_children = ListValue::GetChildren(kv.second);
|
153
|
-
if (list_children.empty()) {
|
154
|
-
throw BinderException("auto_type_candidates requires at least one type");
|
155
|
-
}
|
156
|
-
for (auto &child : list_children) {
|
157
|
-
if (child.type().id() != LogicalTypeId::VARCHAR) {
|
158
|
-
throw BinderException("auto_type_candidates requires a type specification as string");
|
159
|
-
}
|
160
|
-
auto candidate_type = TransformStringToLogicalType(StringValue::Get(child), context);
|
161
|
-
candidate_types[GetCandidateSpecificity(candidate_type)] = candidate_type;
|
162
|
-
}
|
163
|
-
for (auto &candidate_type : candidate_types) {
|
164
|
-
options.auto_type_candidates.emplace_back(candidate_type.second);
|
165
|
-
}
|
166
|
-
} else if (loption == "column_names" || loption == "names") {
|
167
|
-
if (!options.name_list.empty()) {
|
168
|
-
throw BinderException("read_csv_auto column_names/names can only be supplied once");
|
169
|
-
}
|
170
|
-
if (kv.second.IsNull()) {
|
171
|
-
throw BinderException("read_csv_auto %s cannot be NULL", kv.first);
|
172
|
-
}
|
173
|
-
auto &children = ListValue::GetChildren(kv.second);
|
174
|
-
for (auto &child : children) {
|
175
|
-
options.name_list.push_back(StringValue::Get(child));
|
176
|
-
}
|
177
|
-
} else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
|
178
|
-
auto &child_type = kv.second.type();
|
179
|
-
if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
|
180
|
-
throw BinderException("read_csv_auto %s requires a struct or list as input", kv.first);
|
181
|
-
}
|
182
|
-
if (!options.sql_type_list.empty()) {
|
183
|
-
throw BinderException("read_csv_auto column_types/types/dtypes can only be supplied once");
|
184
|
-
}
|
185
|
-
vector<string> sql_type_names;
|
186
|
-
if (child_type.id() == LogicalTypeId::STRUCT) {
|
187
|
-
auto &struct_children = StructValue::GetChildren(kv.second);
|
188
|
-
D_ASSERT(StructType::GetChildCount(child_type) == struct_children.size());
|
189
|
-
for (idx_t i = 0; i < struct_children.size(); i++) {
|
190
|
-
auto &name = StructType::GetChildName(child_type, i);
|
191
|
-
auto &val = struct_children[i];
|
192
|
-
if (val.type().id() != LogicalTypeId::VARCHAR) {
|
193
|
-
throw BinderException("read_csv_auto %s requires a type specification as string", kv.first);
|
194
|
-
}
|
195
|
-
sql_type_names.push_back(StringValue::Get(val));
|
196
|
-
options.sql_types_per_column[name] = i;
|
197
|
-
}
|
198
|
-
} else {
|
199
|
-
auto &list_child = ListType::GetChildType(child_type);
|
200
|
-
if (list_child.id() != LogicalTypeId::VARCHAR) {
|
201
|
-
throw BinderException("read_csv_auto %s requires a list of types (varchar) as input", kv.first);
|
202
|
-
}
|
203
|
-
auto &children = ListValue::GetChildren(kv.second);
|
204
|
-
for (auto &child : children) {
|
205
|
-
sql_type_names.push_back(StringValue::Get(child));
|
206
|
-
}
|
207
|
-
}
|
208
|
-
options.sql_type_list.reserve(sql_type_names.size());
|
209
|
-
for (auto &sql_type : sql_type_names) {
|
210
|
-
auto def_type = TransformStringToLogicalType(sql_type);
|
211
|
-
if (def_type.id() == LogicalTypeId::USER) {
|
212
|
-
throw BinderException("Unrecognized type \"%s\" for read_csv_auto %s definition", sql_type,
|
213
|
-
kv.first);
|
214
|
-
}
|
215
|
-
options.sql_type_list.push_back(std::move(def_type));
|
216
|
-
}
|
217
|
-
} else if (loption == "all_varchar") {
|
218
|
-
options.all_varchar = BooleanValue::Get(kv.second);
|
219
|
-
} else if (loption == "normalize_names") {
|
220
|
-
options.normalize_names = BooleanValue::Get(kv.second);
|
221
|
-
} else {
|
222
|
-
options.SetReadOption(loption, kv.second, names);
|
223
|
-
}
|
224
|
-
}
|
95
|
+
options.FromNamedParameters(input.named_parameters, context, return_types, names);
|
96
|
+
bool explicitly_set_columns = options.explicitly_set_columns;
|
97
|
+
|
225
98
|
options.file_options.AutoDetectHivePartitioning(result->files, context);
|
226
99
|
|
227
100
|
if (!options.auto_detect && return_types.empty()) {
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#include "duckdb/function/table/system_functions.hpp"
|
2
|
+
|
3
|
+
#include "duckdb/catalog/catalog.hpp"
|
4
|
+
#include "duckdb/storage/database_size.hpp"
|
5
|
+
#include "duckdb/main/database_manager.hpp"
|
6
|
+
#include "duckdb/function/function_set.hpp"
|
7
|
+
namespace duckdb {
|
8
|
+
|
9
|
+
struct PragmaMetadataFunctionData : public TableFunctionData {
|
10
|
+
explicit PragmaMetadataFunctionData() {
|
11
|
+
}
|
12
|
+
|
13
|
+
vector<MetadataBlockInfo> metadata_info;
|
14
|
+
};
|
15
|
+
|
16
|
+
struct PragmaMetadataOperatorData : public GlobalTableFunctionState {
|
17
|
+
PragmaMetadataOperatorData() : offset(0) {
|
18
|
+
}
|
19
|
+
|
20
|
+
idx_t offset;
|
21
|
+
};
|
22
|
+
|
23
|
+
static unique_ptr<FunctionData> PragmaMetadataInfoBind(ClientContext &context, TableFunctionBindInput &input,
|
24
|
+
vector<LogicalType> &return_types, vector<string> &names) {
|
25
|
+
names.emplace_back("block_id");
|
26
|
+
return_types.emplace_back(LogicalType::BIGINT);
|
27
|
+
|
28
|
+
names.emplace_back("total_blocks");
|
29
|
+
return_types.emplace_back(LogicalType::BIGINT);
|
30
|
+
|
31
|
+
names.emplace_back("free_blocks");
|
32
|
+
return_types.emplace_back(LogicalType::BIGINT);
|
33
|
+
|
34
|
+
names.emplace_back("free_list");
|
35
|
+
return_types.emplace_back(LogicalType::LIST(LogicalType::BIGINT));
|
36
|
+
|
37
|
+
string db_name =
|
38
|
+
input.inputs.empty() ? DatabaseManager::GetDefaultDatabase(context) : StringValue::Get(input.inputs[0]);
|
39
|
+
auto &catalog = Catalog::GetCatalog(context, db_name);
|
40
|
+
auto result = make_uniq<PragmaMetadataFunctionData>();
|
41
|
+
result->metadata_info = catalog.GetMetadataInfo(context);
|
42
|
+
return std::move(result);
|
43
|
+
}
|
44
|
+
|
45
|
+
unique_ptr<GlobalTableFunctionState> PragmaMetadataInfoInit(ClientContext &context, TableFunctionInitInput &input) {
|
46
|
+
return make_uniq<PragmaMetadataOperatorData>();
|
47
|
+
}
|
48
|
+
|
49
|
+
static void PragmaMetadataInfoFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
|
50
|
+
auto &bind_data = data_p.bind_data->Cast<PragmaMetadataFunctionData>();
|
51
|
+
auto &data = data_p.global_state->Cast<PragmaMetadataOperatorData>();
|
52
|
+
idx_t count = 0;
|
53
|
+
while (data.offset < bind_data.metadata_info.size() && count < STANDARD_VECTOR_SIZE) {
|
54
|
+
auto &entry = bind_data.metadata_info[data.offset++];
|
55
|
+
|
56
|
+
idx_t col_idx = 0;
|
57
|
+
// block_id
|
58
|
+
output.SetValue(col_idx++, count, Value::BIGINT(entry.block_id));
|
59
|
+
// total_blocks
|
60
|
+
output.SetValue(col_idx++, count, Value::BIGINT(entry.total_blocks));
|
61
|
+
// free_blocks
|
62
|
+
output.SetValue(col_idx++, count, Value::BIGINT(entry.free_list.size()));
|
63
|
+
// free_list
|
64
|
+
vector<Value> list_values;
|
65
|
+
for (auto &free_id : entry.free_list) {
|
66
|
+
list_values.push_back(Value::BIGINT(free_id));
|
67
|
+
}
|
68
|
+
output.SetValue(col_idx++, count, Value::LIST(LogicalType::BIGINT, std::move(list_values)));
|
69
|
+
count++;
|
70
|
+
}
|
71
|
+
output.SetCardinality(count);
|
72
|
+
}
|
73
|
+
|
74
|
+
void PragmaMetadataInfo::RegisterFunction(BuiltinFunctions &set) {
|
75
|
+
TableFunctionSet metadata_info("pragma_metadata_info");
|
76
|
+
metadata_info.AddFunction(
|
77
|
+
TableFunction({}, PragmaMetadataInfoFunction, PragmaMetadataInfoBind, PragmaMetadataInfoInit));
|
78
|
+
metadata_info.AddFunction(TableFunction({LogicalType::VARCHAR}, PragmaMetadataInfoFunction, PragmaMetadataInfoBind,
|
79
|
+
PragmaMetadataInfoInit));
|
80
|
+
set.AddFunction(metadata_info);
|
81
|
+
}
|
82
|
+
|
83
|
+
} // namespace duckdb
|
@@ -76,6 +76,9 @@ static unique_ptr<FunctionData> PragmaStorageInfoBind(ClientContext &context, Ta
|
|
76
76
|
names.emplace_back("block_offset");
|
77
77
|
return_types.emplace_back(LogicalType::BIGINT);
|
78
78
|
|
79
|
+
names.emplace_back("segment_info");
|
80
|
+
return_types.emplace_back(LogicalType::VARCHAR);
|
81
|
+
|
79
82
|
auto qname = QualifiedName::Parse(input.inputs[0].GetValue<string>());
|
80
83
|
|
81
84
|
// look up the table name in the catalog
|
@@ -133,6 +136,8 @@ static void PragmaStorageInfoFunction(ClientContext &context, TableFunctionInput
|
|
133
136
|
output.SetValue(col_idx++, count, Value());
|
134
137
|
output.SetValue(col_idx++, count, Value());
|
135
138
|
}
|
139
|
+
// segment_info
|
140
|
+
output.SetValue(col_idx++, count, Value(entry.segment_info));
|
136
141
|
count++;
|
137
142
|
}
|
138
143
|
output.SetCardinality(count);
|
@@ -14,6 +14,7 @@ void BuiltinFunctions::RegisterSQLiteFunctions() {
|
|
14
14
|
PragmaCollations::RegisterFunction(*this);
|
15
15
|
PragmaTableInfo::RegisterFunction(*this);
|
16
16
|
PragmaStorageInfo::RegisterFunction(*this);
|
17
|
+
PragmaMetadataInfo::RegisterFunction(*this);
|
17
18
|
PragmaDatabaseSize::RegisterFunction(*this);
|
18
19
|
PragmaLastProfilingOutput::RegisterFunction(*this);
|
19
20
|
PragmaDetailedProfilingOutput::RegisterFunction(*this);
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
2
|
-
#define DUCKDB_VERSION "0.8.2-
|
2
|
+
#define DUCKDB_VERSION "0.8.2-dev4623"
|
3
3
|
#endif
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
5
|
+
#define DUCKDB_SOURCE_ID "52a47a6b31"
|
6
6
|
#endif
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
8
8
|
#include "duckdb/main/database.hpp"
|
@@ -34,6 +34,7 @@ struct CreateIndexInfo;
|
|
34
34
|
struct CreateTypeInfo;
|
35
35
|
struct CreateTableInfo;
|
36
36
|
struct DatabaseSize;
|
37
|
+
struct MetadataBlockInfo;
|
37
38
|
|
38
39
|
class AttachedDatabase;
|
39
40
|
class ClientContext;
|
@@ -266,6 +267,7 @@ public:
|
|
266
267
|
unique_ptr<LogicalOperator> plan) = 0;
|
267
268
|
|
268
269
|
virtual DatabaseSize GetDatabaseSize(ClientContext &context) = 0;
|
270
|
+
virtual vector<MetadataBlockInfo> GetMetadataInfo(ClientContext &context);
|
269
271
|
|
270
272
|
virtual bool InMemory() = 0;
|
271
273
|
virtual string GetDBPath() = 0;
|
@@ -54,6 +54,7 @@ public:
|
|
54
54
|
unique_ptr<LogicalOperator> plan) override;
|
55
55
|
|
56
56
|
DatabaseSize GetDatabaseSize(ClientContext &context) override;
|
57
|
+
vector<MetadataBlockInfo> GetMetadataInfo(ClientContext &context) override;
|
57
58
|
|
58
59
|
DUCKDB_API bool InMemory() override;
|
59
60
|
DUCKDB_API string GetDBPath() override;
|
@@ -18,7 +18,7 @@ class ColumnDataCollection;
|
|
18
18
|
class ColumnDataRowCollection;
|
19
19
|
|
20
20
|
enum class ValueRenderAlignment { LEFT, MIDDLE, RIGHT };
|
21
|
-
enum class RenderMode { ROWS, COLUMNS };
|
21
|
+
enum class RenderMode : uint8_t { ROWS, COLUMNS };
|
22
22
|
|
23
23
|
struct BoxRendererConfig {
|
24
24
|
// a max_width of 0 means we default to the terminal width
|
@@ -216,6 +216,8 @@ enum class QuoteRule : uint8_t;
|
|
216
216
|
|
217
217
|
enum class RelationType : uint8_t;
|
218
218
|
|
219
|
+
enum class RenderMode : uint8_t;
|
220
|
+
|
219
221
|
enum class ResultModifierType : uint8_t;
|
220
222
|
|
221
223
|
enum class SampleMethod : uint8_t;
|
@@ -565,6 +567,9 @@ const char* EnumUtil::ToChars<QuoteRule>(QuoteRule value);
|
|
565
567
|
template<>
|
566
568
|
const char* EnumUtil::ToChars<RelationType>(RelationType value);
|
567
569
|
|
570
|
+
template<>
|
571
|
+
const char* EnumUtil::ToChars<RenderMode>(RenderMode value);
|
572
|
+
|
568
573
|
template<>
|
569
574
|
const char* EnumUtil::ToChars<ResultModifierType>(ResultModifierType value);
|
570
575
|
|
@@ -950,6 +955,9 @@ QuoteRule EnumUtil::FromString<QuoteRule>(const char *value);
|
|
950
955
|
template<>
|
951
956
|
RelationType EnumUtil::FromString<RelationType>(const char *value);
|
952
957
|
|
958
|
+
template<>
|
959
|
+
RenderMode EnumUtil::FromString<RenderMode>(const char *value);
|
960
|
+
|
953
961
|
template<>
|
954
962
|
ResultModifierType EnumUtil::FromString<ResultModifierType>(const char *value);
|
955
963
|
|
@@ -15,10 +15,12 @@
|
|
15
15
|
namespace duckdb {
|
16
16
|
class ClientContext;
|
17
17
|
class Catalog;
|
18
|
+
class DatabaseInstance;
|
18
19
|
enum class ExpressionType : uint8_t;
|
19
20
|
|
20
21
|
struct DeserializationData {
|
21
22
|
stack<reference<ClientContext>> contexts;
|
23
|
+
stack<reference<DatabaseInstance>> databases;
|
22
24
|
stack<idx_t> enums;
|
23
25
|
stack<reference<bound_parameter_map_t>> parameter_data;
|
24
26
|
stack<reference<LogicalType>> types;
|
@@ -74,6 +76,23 @@ inline void DeserializationData::Unset<LogicalOperatorType>() {
|
|
74
76
|
enums.pop();
|
75
77
|
}
|
76
78
|
|
79
|
+
template <>
|
80
|
+
inline void DeserializationData::Set(CompressionType type) {
|
81
|
+
enums.push(idx_t(type));
|
82
|
+
}
|
83
|
+
|
84
|
+
template <>
|
85
|
+
inline CompressionType DeserializationData::Get() {
|
86
|
+
AssertNotEmpty(enums);
|
87
|
+
return CompressionType(enums.top());
|
88
|
+
}
|
89
|
+
|
90
|
+
template <>
|
91
|
+
inline void DeserializationData::Unset<CompressionType>() {
|
92
|
+
AssertNotEmpty(enums);
|
93
|
+
enums.pop();
|
94
|
+
}
|
95
|
+
|
77
96
|
template <>
|
78
97
|
inline void DeserializationData::Set(CatalogType type) {
|
79
98
|
enums.push(idx_t(type));
|
@@ -108,6 +127,23 @@ inline void DeserializationData::Unset<ClientContext>() {
|
|
108
127
|
contexts.pop();
|
109
128
|
}
|
110
129
|
|
130
|
+
template <>
|
131
|
+
inline void DeserializationData::Set(DatabaseInstance &db) {
|
132
|
+
databases.push(db);
|
133
|
+
}
|
134
|
+
|
135
|
+
template <>
|
136
|
+
inline DatabaseInstance &DeserializationData::Get() {
|
137
|
+
AssertNotEmpty(databases);
|
138
|
+
return databases.top();
|
139
|
+
}
|
140
|
+
|
141
|
+
template <>
|
142
|
+
inline void DeserializationData::Unset<DatabaseInstance>() {
|
143
|
+
AssertNotEmpty(databases);
|
144
|
+
databases.pop();
|
145
|
+
}
|
146
|
+
|
111
147
|
template <>
|
112
148
|
inline void DeserializationData::Set(bound_parameter_map_t &context) {
|
113
149
|
parameter_data.push(context);
|
@@ -159,18 +159,33 @@ struct CSVReaderOptions {
|
|
159
159
|
string suffix;
|
160
160
|
string write_newline;
|
161
161
|
|
162
|
+
//! The date format to use (if any is specified)
|
163
|
+
map<LogicalTypeId, StrpTimeFormat> date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
|
162
164
|
//! The date format to use for writing (if any is specified)
|
163
165
|
map<LogicalTypeId, StrfTimeFormat> write_date_format = {{LogicalTypeId::DATE, {}}, {LogicalTypeId::TIMESTAMP, {}}};
|
166
|
+
//! Whether or not a type format is specified
|
167
|
+
map<LogicalTypeId, bool> has_format = {{LogicalTypeId::DATE, false}, {LogicalTypeId::TIMESTAMP, false}};
|
164
168
|
|
165
169
|
void Serialize(Serializer &serializer) const;
|
166
170
|
static CSVReaderOptions Deserialize(Deserializer &deserializer);
|
167
171
|
|
168
172
|
void SetCompression(const string &compression);
|
173
|
+
|
174
|
+
bool GetHeader() const;
|
169
175
|
void SetHeader(bool has_header);
|
176
|
+
|
177
|
+
string GetEscape() const;
|
170
178
|
void SetEscape(const string &escape);
|
179
|
+
|
180
|
+
int64_t GetSkipRows() const;
|
181
|
+
void SetSkipRows(int64_t rows);
|
182
|
+
|
183
|
+
string GetQuote() const;
|
171
184
|
void SetQuote(const string "e);
|
172
185
|
void SetDelimiter(const string &delimiter);
|
186
|
+
string GetDelimiter() const;
|
173
187
|
|
188
|
+
NewLineIdentifier GetNewline() const;
|
174
189
|
void SetNewline(const string &input);
|
175
190
|
//! Set an option that is supported by both reading and writing functions, called by
|
176
191
|
//! the SetReadOption and SetWriteOption methods
|
@@ -182,7 +197,16 @@ struct CSVReaderOptions {
|
|
182
197
|
void SetReadOption(const string &loption, const Value &value, vector<string> &expected_names);
|
183
198
|
void SetWriteOption(const string &loption, const Value &value);
|
184
199
|
void SetDateFormat(LogicalTypeId type, const string &format, bool read_format);
|
200
|
+
void ToNamedParameters(named_parameter_map_t &out);
|
201
|
+
void FromNamedParameters(named_parameter_map_t &in, ClientContext &context, vector<LogicalType> &return_types,
|
202
|
+
vector<string> &names);
|
185
203
|
|
186
204
|
string ToString() const;
|
205
|
+
|
206
|
+
named_parameter_map_t OutputReadSettings();
|
207
|
+
|
208
|
+
public:
|
209
|
+
//! Whether columns were explicitly provided through named parameters
|
210
|
+
bool explicitly_set_columns = false;
|
187
211
|
};
|
188
212
|
} // namespace duckdb
|
@@ -14,6 +14,7 @@
|
|
14
14
|
#include "duckdb/common/map.hpp"
|
15
15
|
#include "duckdb/storage/storage_info.hpp"
|
16
16
|
#include "duckdb/common/mutex.hpp"
|
17
|
+
#include "duckdb/storage/data_pointer.hpp"
|
17
18
|
|
18
19
|
namespace duckdb {
|
19
20
|
class DatabaseInstance;
|
@@ -21,6 +22,7 @@ class ColumnData;
|
|
21
22
|
class ColumnDataCheckpointer;
|
22
23
|
class ColumnSegment;
|
23
24
|
class SegmentStatistics;
|
25
|
+
struct ColumnSegmentState;
|
24
26
|
|
25
27
|
struct ColumnFetchState;
|
26
28
|
struct ColumnScanState;
|
@@ -62,6 +64,11 @@ struct CompressedSegmentState {
|
|
62
64
|
virtual ~CompressedSegmentState() {
|
63
65
|
}
|
64
66
|
|
67
|
+
//! Display info for PRAGMA storage_info
|
68
|
+
virtual string GetSegmentInfo() const { // LCOV_EXCL_START
|
69
|
+
return "";
|
70
|
+
} // LCOV_EXCL_STOP
|
71
|
+
|
65
72
|
template <class TARGET>
|
66
73
|
TARGET &Cast() {
|
67
74
|
D_ASSERT(dynamic_cast<TARGET *>(this));
|
@@ -75,7 +82,7 @@ struct CompressedSegmentState {
|
|
75
82
|
};
|
76
83
|
|
77
84
|
struct CompressionAppendState {
|
78
|
-
CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) {
|
85
|
+
explicit CompressionAppendState(BufferHandle handle_p) : handle(std::move(handle_p)) {
|
79
86
|
}
|
80
87
|
virtual ~CompressionAppendState() {
|
81
88
|
}
|
@@ -139,13 +146,24 @@ typedef void (*compression_skip_t)(ColumnSegment &segment, ColumnScanState &stat
|
|
139
146
|
//===--------------------------------------------------------------------===//
|
140
147
|
// Append (optional)
|
141
148
|
//===--------------------------------------------------------------------===//
|
142
|
-
typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(
|
149
|
+
typedef unique_ptr<CompressedSegmentState> (*compression_init_segment_t)(
|
150
|
+
ColumnSegment &segment, block_id_t block_id, optional_ptr<ColumnSegmentState> segment_state);
|
143
151
|
typedef unique_ptr<CompressionAppendState> (*compression_init_append_t)(ColumnSegment &segment);
|
144
152
|
typedef idx_t (*compression_append_t)(CompressionAppendState &append_state, ColumnSegment &segment,
|
145
153
|
SegmentStatistics &stats, UnifiedVectorFormat &data, idx_t offset, idx_t count);
|
146
154
|
typedef idx_t (*compression_finalize_append_t)(ColumnSegment &segment, SegmentStatistics &stats);
|
147
155
|
typedef void (*compression_revert_append_t)(ColumnSegment &segment, idx_t start_row);
|
148
156
|
|
157
|
+
//===--------------------------------------------------------------------===//
|
158
|
+
// Serialization (optional)
|
159
|
+
//===--------------------------------------------------------------------===//
|
160
|
+
//! Function prototype for serializing the segment state
|
161
|
+
typedef unique_ptr<ColumnSegmentState> (*compression_serialize_state_t)(ColumnSegment &segment);
|
162
|
+
//! Function prototype for deserializing the segment state
|
163
|
+
typedef unique_ptr<ColumnSegmentState> (*compression_deserialize_state_t)(Deserializer &deserializer);
|
164
|
+
//! Function prototype for cleaning up the segment state when the column data is dropped
|
165
|
+
typedef void (*compression_cleanup_state_t)(ColumnSegment &segment);
|
166
|
+
|
149
167
|
class CompressionFunction {
|
150
168
|
public:
|
151
169
|
CompressionFunction(CompressionType type, PhysicalType data_type, compression_init_analyze_t init_analyze,
|
@@ -157,12 +175,16 @@ public:
|
|
157
175
|
compression_init_segment_t init_segment = nullptr,
|
158
176
|
compression_init_append_t init_append = nullptr, compression_append_t append = nullptr,
|
159
177
|
compression_finalize_append_t finalize_append = nullptr,
|
160
|
-
compression_revert_append_t revert_append = nullptr
|
178
|
+
compression_revert_append_t revert_append = nullptr,
|
179
|
+
compression_serialize_state_t serialize_state = nullptr,
|
180
|
+
compression_deserialize_state_t deserialize_state = nullptr,
|
181
|
+
compression_cleanup_state_t cleanup_state = nullptr)
|
161
182
|
: type(type), data_type(data_type), init_analyze(init_analyze), analyze(analyze), final_analyze(final_analyze),
|
162
183
|
init_compression(init_compression), compress(compress), compress_finalize(compress_finalize),
|
163
184
|
init_scan(init_scan), scan_vector(scan_vector), scan_partial(scan_partial), fetch_row(fetch_row), skip(skip),
|
164
185
|
init_segment(init_segment), init_append(init_append), append(append), finalize_append(finalize_append),
|
165
|
-
revert_append(revert_append)
|
186
|
+
revert_append(revert_append), serialize_state(serialize_state), deserialize_state(deserialize_state),
|
187
|
+
cleanup_state(cleanup_state) {
|
166
188
|
}
|
167
189
|
|
168
190
|
//! Compression type
|
@@ -218,6 +240,16 @@ public:
|
|
218
240
|
compression_finalize_append_t finalize_append;
|
219
241
|
//! Revert append (optional)
|
220
242
|
compression_revert_append_t revert_append;
|
243
|
+
|
244
|
+
// State serialize functions
|
245
|
+
//! This is only necessary if the segment state has information that must be written to disk in the metadata
|
246
|
+
|
247
|
+
//! Serialize the segment state to the metadata (optional)
|
248
|
+
compression_serialize_state_t serialize_state;
|
249
|
+
//! Deserialize the segment state to the metadata (optional)
|
250
|
+
compression_deserialize_state_t deserialize_state;
|
251
|
+
//! Cleanup the segment state (optional)
|
252
|
+
compression_cleanup_state_t cleanup_state;
|
221
253
|
};
|
222
254
|
|
223
255
|
//! The set of compression functions
|
@@ -129,6 +129,8 @@ public:
|
|
129
129
|
|
130
130
|
//! Scan Function
|
131
131
|
static void ArrowScanFunction(ClientContext &context, TableFunctionInput &data, DataChunk &output);
|
132
|
+
static void PopulateArrowTableType(ArrowTableType &arrow_table, ArrowSchemaWrapper &schema_p, vector<string> &names,
|
133
|
+
vector<LogicalType> &return_types);
|
132
134
|
|
133
135
|
protected:
|
134
136
|
//! Defines Maximum Number of Threads
|
@@ -25,6 +25,10 @@ struct PragmaStorageInfo {
|
|
25
25
|
static void RegisterFunction(BuiltinFunctions &set);
|
26
26
|
};
|
27
27
|
|
28
|
+
struct PragmaMetadataInfo {
|
29
|
+
static void RegisterFunction(BuiltinFunctions &set);
|
30
|
+
};
|
31
|
+
|
28
32
|
struct PragmaLastProfilingOutput {
|
29
33
|
static void RegisterFunction(BuiltinFunctions &set);
|
30
34
|
};
|
@@ -131,7 +131,7 @@ public:
|
|
131
131
|
|
132
132
|
//! Reads CSV file
|
133
133
|
DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file);
|
134
|
-
DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file,
|
134
|
+
DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, named_parameter_map_t &&options);
|
135
135
|
DUCKDB_API shared_ptr<Relation> ReadCSV(const string &csv_file, const vector<string> &columns);
|
136
136
|
|
137
137
|
//! Reads Parquet file
|
@@ -118,6 +118,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
|
|
118
118
|
{"st_dwithin_spheroid", "spatial"},
|
119
119
|
{"st_envelope", "spatial"},
|
120
120
|
{"st_equals", "spatial"},
|
121
|
+
{"st_extent", "spatial"},
|
121
122
|
{"st_flipcoordinates", "spatial"},
|
122
123
|
{"st_geometrytype", "spatial"},
|
123
124
|
{"st_geomfromgeojson", "spatial"},
|
@@ -126,6 +127,7 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
|
|
126
127
|
{"st_geomfromtext", "spatial"},
|
127
128
|
{"st_geomfromwkb", "spatial"},
|
128
129
|
{"st_intersection", "spatial"},
|
130
|
+
{"st_intersection_agg", "spatial"},
|
129
131
|
{"st_intersects", "spatial"},
|
130
132
|
{"st_isclosed", "spatial"},
|
131
133
|
{"st_isempty", "spatial"},
|
@@ -159,9 +161,14 @@ static constexpr ExtensionEntry EXTENSION_FUNCTIONS[] = {
|
|
159
161
|
{"st_touches", "spatial"},
|
160
162
|
{"st_transform", "spatial"},
|
161
163
|
{"st_union", "spatial"},
|
164
|
+
{"st_union_agg", "spatial"},
|
162
165
|
{"st_within", "spatial"},
|
163
166
|
{"st_x", "spatial"},
|
167
|
+
{"st_xmax", "spatial"},
|
168
|
+
{"st_xmin", "spatial"},
|
164
169
|
{"st_y", "spatial"},
|
170
|
+
{"st_ymax", "spatial"},
|
171
|
+
{"st_ymin", "spatial"},
|
165
172
|
{"stem", "fts"},
|
166
173
|
{"text", "excel"},
|
167
174
|
{"to_arrow_ipc", "arrow"},
|
@@ -220,10 +227,9 @@ static constexpr ExtensionEntry EXTENSION_FILE_PREFIXES[] = {
|
|
220
227
|
|
221
228
|
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
|
222
229
|
// TODO: automate by passing though to script via duckdb
|
223
|
-
static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {
|
224
|
-
|
225
|
-
|
226
|
-
{".ndjson", "json"}}; // END_OF_EXTENSION_FILE_POSTFIXES
|
230
|
+
static constexpr ExtensionEntry EXTENSION_FILE_POSTFIXES[] = {
|
231
|
+
{".parquet", "parquet"}, {".json", "json"}, {".jsonl", "json"}, {".ndjson", "json"},
|
232
|
+
{".shp", "spatial"}, {".gpkg", "spatial"}, {".fgb", "spatial"}}; // END_OF_EXTENSION_FILE_POSTFIXES
|
227
233
|
|
228
234
|
// Note: these are currently hardcoded in scripts/generate_extensions_function.py
|
229
235
|
// TODO: automate by passing though to script via duckdb
|
@@ -10,16 +10,16 @@
|
|
10
10
|
|
11
11
|
#include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
|
12
12
|
#include "duckdb/main/relation/table_function_relation.hpp"
|
13
|
+
#include "duckdb/common/shared_ptr.hpp"
|
14
|
+
#include "duckdb/common/case_insensitive_map.hpp"
|
13
15
|
|
14
16
|
namespace duckdb {
|
15
17
|
|
16
|
-
struct CSVReaderOptions;
|
17
|
-
|
18
18
|
class ReadCSVRelation : public TableFunctionRelation {
|
19
19
|
public:
|
20
20
|
ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, vector<ColumnDefinition> columns,
|
21
21
|
string alias = string());
|
22
|
-
ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file,
|
22
|
+
ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, named_parameter_map_t &&options,
|
23
23
|
string alias = string());
|
24
24
|
|
25
25
|
string alias;
|