duckdb 0.7.1-dev107.0 → 0.7.1-dev137.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +29 -5
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +5 -1
- package/src/duckdb/extension/json/include/json_scan.hpp +17 -2
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +19 -0
- package/src/duckdb/extension/json/json_functions/read_json.cpp +30 -28
- package/src/duckdb/extension/json/json_functions.cpp +6 -0
- package/src/duckdb/extension/json/json_scan.cpp +109 -22
- package/src/duckdb/src/common/types.cpp +36 -10
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +3 -0
- package/src/duckdb/src/function/table/read_csv.cpp +13 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -2
- package/src/duckdb/src/parser/statement/copy_statement.cpp +2 -13
- package/src/duckdb/src/parser/transform/expression/transform_case.cpp +3 -3
package/package.json
CHANGED
|
@@ -25,7 +25,12 @@ JSONBufferHandle::JSONBufferHandle(idx_t buffer_index_p, idx_t readers_p, Alloca
|
|
|
25
25
|
JSONFileHandle::JSONFileHandle(unique_ptr<FileHandle> file_handle_p, Allocator &allocator_p)
|
|
26
26
|
: file_handle(std::move(file_handle_p)), allocator(allocator_p), can_seek(file_handle->CanSeek()),
|
|
27
27
|
plain_file_source(file_handle->OnDiskFile() && can_seek), file_size(file_handle->GetFileSize()), read_position(0),
|
|
28
|
-
cached_size(0) {
|
|
28
|
+
requested_reads(0), actual_reads(0), cached_size(0) {
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
void JSONFileHandle::Close() {
|
|
32
|
+
file_handle->Close();
|
|
33
|
+
cached_buffers.clear();
|
|
29
34
|
}
|
|
30
35
|
|
|
31
36
|
idx_t JSONFileHandle::FileSize() const {
|
|
@@ -36,10 +41,6 @@ idx_t JSONFileHandle::Remaining() const {
|
|
|
36
41
|
return file_size - read_position;
|
|
37
42
|
}
|
|
38
43
|
|
|
39
|
-
bool JSONFileHandle::PlainFileSource() const {
|
|
40
|
-
return plain_file_source;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
44
|
bool JSONFileHandle::CanSeek() const {
|
|
44
45
|
return can_seek;
|
|
45
46
|
}
|
|
@@ -53,6 +54,9 @@ idx_t JSONFileHandle::GetPositionAndSize(idx_t &position, idx_t requested_size)
|
|
|
53
54
|
position = read_position;
|
|
54
55
|
auto actual_size = MinValue<idx_t>(requested_size, Remaining());
|
|
55
56
|
read_position += actual_size;
|
|
57
|
+
if (actual_size != 0) {
|
|
58
|
+
requested_reads++;
|
|
59
|
+
}
|
|
56
60
|
return actual_size;
|
|
57
61
|
}
|
|
58
62
|
|
|
@@ -60,11 +64,13 @@ void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t posit
|
|
|
60
64
|
D_ASSERT(size != 0);
|
|
61
65
|
if (plain_file_source) {
|
|
62
66
|
file_handle->Read((void *)pointer, size, position);
|
|
67
|
+
actual_reads++;
|
|
63
68
|
return;
|
|
64
69
|
}
|
|
65
70
|
|
|
66
71
|
if (sample_run) { // Cache the buffer
|
|
67
72
|
file_handle->Read((void *)pointer, size, position);
|
|
73
|
+
actual_reads++;
|
|
68
74
|
cached_buffers.emplace_back(allocator.Allocate(size));
|
|
69
75
|
memcpy(cached_buffers.back().get(), pointer, size);
|
|
70
76
|
cached_size += size;
|
|
@@ -73,9 +79,11 @@ void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t posit
|
|
|
73
79
|
|
|
74
80
|
if (!cached_buffers.empty() || position < cached_size) {
|
|
75
81
|
ReadFromCache(pointer, size, position);
|
|
82
|
+
actual_reads++;
|
|
76
83
|
}
|
|
77
84
|
if (size != 0) {
|
|
78
85
|
file_handle->Read((void *)pointer, size, position);
|
|
86
|
+
actual_reads++;
|
|
79
87
|
}
|
|
80
88
|
}
|
|
81
89
|
|
|
@@ -143,6 +151,16 @@ void BufferedJSONReader::OpenJSONFile() {
|
|
|
143
151
|
file_handle = make_unique<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
|
|
144
152
|
}
|
|
145
153
|
|
|
154
|
+
void BufferedJSONReader::CloseJSONFile() {
|
|
155
|
+
while (true) {
|
|
156
|
+
lock_guard<mutex> guard(lock);
|
|
157
|
+
if (file_handle->RequestedReadsComplete()) {
|
|
158
|
+
file_handle->Close();
|
|
159
|
+
break;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
146
164
|
bool BufferedJSONReader::IsOpen() {
|
|
147
165
|
return file_handle != nullptr;
|
|
148
166
|
}
|
|
@@ -246,9 +264,15 @@ void BufferedJSONReader::Reset() {
|
|
|
246
264
|
|
|
247
265
|
void JSONFileHandle::Reset() {
|
|
248
266
|
read_position = 0;
|
|
267
|
+
requested_reads = 0;
|
|
268
|
+
actual_reads = 0;
|
|
249
269
|
if (plain_file_source) {
|
|
250
270
|
file_handle->Reset();
|
|
251
271
|
}
|
|
252
272
|
}
|
|
253
273
|
|
|
274
|
+
bool JSONFileHandle::RequestedReadsComplete() {
|
|
275
|
+
return requested_reads == actual_reads;
|
|
276
|
+
}
|
|
277
|
+
|
|
254
278
|
} // namespace duckdb
|
|
@@ -58,11 +58,11 @@ public:
|
|
|
58
58
|
struct JSONFileHandle {
|
|
59
59
|
public:
|
|
60
60
|
JSONFileHandle(unique_ptr<FileHandle> file_handle, Allocator &allocator);
|
|
61
|
+
void Close();
|
|
61
62
|
|
|
62
63
|
idx_t FileSize() const;
|
|
63
64
|
idx_t Remaining() const;
|
|
64
65
|
|
|
65
|
-
bool PlainFileSource() const;
|
|
66
66
|
bool CanSeek() const;
|
|
67
67
|
void Seek(idx_t position);
|
|
68
68
|
|
|
@@ -71,6 +71,7 @@ public:
|
|
|
71
71
|
idx_t Read(const char *pointer, idx_t requested_size, bool sample_run);
|
|
72
72
|
|
|
73
73
|
void Reset();
|
|
74
|
+
bool RequestedReadsComplete();
|
|
74
75
|
|
|
75
76
|
private:
|
|
76
77
|
idx_t ReadFromCache(const char *&pointer, idx_t &size, idx_t &position);
|
|
@@ -87,6 +88,8 @@ private:
|
|
|
87
88
|
|
|
88
89
|
//! Read properties
|
|
89
90
|
idx_t read_position;
|
|
91
|
+
idx_t requested_reads;
|
|
92
|
+
atomic<idx_t> actual_reads;
|
|
90
93
|
|
|
91
94
|
//! Cached buffers for resetting when reading stream
|
|
92
95
|
vector<AllocatedData> cached_buffers;
|
|
@@ -98,6 +101,7 @@ public:
|
|
|
98
101
|
BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, string file_path);
|
|
99
102
|
|
|
100
103
|
void OpenJSONFile();
|
|
104
|
+
void CloseJSONFile();
|
|
101
105
|
bool IsOpen();
|
|
102
106
|
|
|
103
107
|
BufferedJSONReaderOptions &GetOptions();
|
|
@@ -26,6 +26,16 @@ enum class JSONScanType : uint8_t {
|
|
|
26
26
|
SAMPLE = 3,
|
|
27
27
|
};
|
|
28
28
|
|
|
29
|
+
enum class JSONScanTopLevelType : uint8_t {
|
|
30
|
+
INVALID = 0,
|
|
31
|
+
//! Sequential objects, e.g., NDJSON
|
|
32
|
+
OBJECTS = 1,
|
|
33
|
+
//! Top-level array containing objects
|
|
34
|
+
ARRAY_OF_OBJECTS = 2,
|
|
35
|
+
//! Other, e.g., array of integer, or just strings
|
|
36
|
+
OTHER = 3
|
|
37
|
+
};
|
|
38
|
+
|
|
29
39
|
//! Even though LogicalTypeId is just a uint8_t, this is still needed ...
|
|
30
40
|
struct LogicalTypeIdHash {
|
|
31
41
|
inline std::size_t operator()(const LogicalTypeId &id) const {
|
|
@@ -105,7 +115,7 @@ public:
|
|
|
105
115
|
//! Max depth we go to detect nested JSON schema (defaults to unlimited)
|
|
106
116
|
idx_t max_depth = NumericLimits<idx_t>::Maximum();
|
|
107
117
|
//! Whether we're parsing objects (usually), or something else like arrays
|
|
108
|
-
|
|
118
|
+
JSONScanTopLevelType top_level_type = JSONScanTopLevelType::OBJECTS;
|
|
109
119
|
//! Forced date/timestamp formats
|
|
110
120
|
string date_format;
|
|
111
121
|
string timestamp_format;
|
|
@@ -181,9 +191,14 @@ public:
|
|
|
181
191
|
yyjson_alc *GetAllocator();
|
|
182
192
|
void ThrowTransformError(idx_t count, idx_t object_index, const string &error_message);
|
|
183
193
|
|
|
194
|
+
idx_t scan_count;
|
|
184
195
|
JSONLine lines[STANDARD_VECTOR_SIZE];
|
|
185
196
|
yyjson_val *objects[STANDARD_VECTOR_SIZE];
|
|
186
197
|
|
|
198
|
+
idx_t array_idx;
|
|
199
|
+
idx_t array_offset;
|
|
200
|
+
yyjson_val *array_objects[STANDARD_VECTOR_SIZE];
|
|
201
|
+
|
|
187
202
|
idx_t batch_index;
|
|
188
203
|
|
|
189
204
|
//! Options when transforming the JSON to columnar data
|
|
@@ -192,6 +207,7 @@ public:
|
|
|
192
207
|
|
|
193
208
|
private:
|
|
194
209
|
yyjson_val *ParseLine(char *line_start, idx_t line_size, idx_t remaining, JSONLine &line);
|
|
210
|
+
idx_t GetObjectsFromArray();
|
|
195
211
|
|
|
196
212
|
private:
|
|
197
213
|
//! Bind data
|
|
@@ -300,7 +316,6 @@ public:
|
|
|
300
316
|
table_function.serialize = JSONScanSerialize;
|
|
301
317
|
table_function.deserialize = JSONScanDeserialize;
|
|
302
318
|
|
|
303
|
-
// TODO: might be able to do some of these
|
|
304
319
|
table_function.projection_pushdown = false;
|
|
305
320
|
table_function.filter_pushdown = false;
|
|
306
321
|
table_function.filter_prune = false;
|
|
@@ -523,6 +523,21 @@ static bool TransformArray(yyjson_val *arrays[], yyjson_alc *alc, Vector &result
|
|
|
523
523
|
return success;
|
|
524
524
|
}
|
|
525
525
|
|
|
526
|
+
bool TransformToJSON(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count) {
|
|
527
|
+
auto data = (string_t *)FlatVector::GetData(result);
|
|
528
|
+
auto &validity = FlatVector::Validity(result);
|
|
529
|
+
for (idx_t i = 0; i < count; i++) {
|
|
530
|
+
const auto &val = vals[i];
|
|
531
|
+
if (!val) {
|
|
532
|
+
validity.SetInvalid(i);
|
|
533
|
+
} else {
|
|
534
|
+
data[i] = JSONCommon::WriteVal(val, alc);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
// Can always transform to JSON
|
|
538
|
+
return true;
|
|
539
|
+
}
|
|
540
|
+
|
|
526
541
|
bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
|
|
527
542
|
JSONTransformOptions &options) {
|
|
528
543
|
auto result_type = result.GetType();
|
|
@@ -531,6 +546,10 @@ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &resul
|
|
|
531
546
|
return TransformFromStringWithFormat(vals, result, count, options);
|
|
532
547
|
}
|
|
533
548
|
|
|
549
|
+
if (JSONCommon::LogicalTypeIsJSON(result_type)) {
|
|
550
|
+
return TransformToJSON(vals, alc, result, count);
|
|
551
|
+
}
|
|
552
|
+
|
|
534
553
|
switch (result_type.id()) {
|
|
535
554
|
case LogicalTypeId::SQLNULL:
|
|
536
555
|
return true;
|
|
@@ -13,32 +13,17 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
|
|
|
13
13
|
JSONScanLocalState lstate(context, gstate);
|
|
14
14
|
ArenaAllocator allocator(BufferAllocator::Get(context));
|
|
15
15
|
|
|
16
|
-
static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
|
|
17
|
-
{LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
|
|
18
|
-
{LogicalTypeId::TIMESTAMP,
|
|
19
|
-
{"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
|
|
20
|
-
"%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"}},
|
|
21
|
-
};
|
|
22
|
-
|
|
23
|
-
// Populate possible date/timestamp formats, assume this is consistent across columns
|
|
24
|
-
for (auto &kv : FORMAT_TEMPLATES) {
|
|
25
|
-
const auto &type = kv.first;
|
|
26
|
-
if (bind_data.date_format_map.HasFormats(type)) {
|
|
27
|
-
continue; // Already populated
|
|
28
|
-
}
|
|
29
|
-
const auto &format_strings = kv.second;
|
|
30
|
-
for (auto &format_string : format_strings) {
|
|
31
|
-
bind_data.date_format_map.AddFormat(type, format_string);
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
|
|
35
16
|
// Read for the specified sample size
|
|
36
17
|
JSONStructureNode node;
|
|
18
|
+
bool more_than_one = false;
|
|
37
19
|
Vector string_vector(LogicalType::VARCHAR);
|
|
38
20
|
idx_t remaining = bind_data.sample_size;
|
|
39
21
|
while (remaining != 0) {
|
|
40
22
|
allocator.Reset();
|
|
41
23
|
auto read_count = lstate.ReadNext(gstate);
|
|
24
|
+
if (read_count > 1) {
|
|
25
|
+
more_than_one = true;
|
|
26
|
+
}
|
|
42
27
|
if (read_count == 0) {
|
|
43
28
|
break;
|
|
44
29
|
}
|
|
@@ -54,15 +39,29 @@ void JSONScan::AutoDetect(ClientContext &context, JSONScanData &bind_data, vecto
|
|
|
54
39
|
node.InitializeCandidateTypes(bind_data.max_depth);
|
|
55
40
|
node.RefineCandidateTypes(lstate.objects, next, string_vector, allocator, bind_data.date_format_map);
|
|
56
41
|
remaining -= next;
|
|
42
|
+
|
|
43
|
+
if (gstate.file_index == 10) {
|
|
44
|
+
// We really shouldn't open more than 10 files when sampling
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
57
47
|
}
|
|
58
48
|
bind_data.type = original_scan_type;
|
|
59
49
|
bind_data.transform_options.date_format_map = &bind_data.date_format_map;
|
|
60
50
|
|
|
61
|
-
|
|
51
|
+
auto type = JSONStructure::StructureToType(context, node, bind_data.max_depth);
|
|
52
|
+
if (type.id() == LogicalTypeId::STRUCT) {
|
|
53
|
+
bind_data.top_level_type = JSONScanTopLevelType::OBJECTS;
|
|
54
|
+
} else if (!more_than_one && type.id() == LogicalTypeId::LIST &&
|
|
55
|
+
ListType::GetChildType(type).id() == LogicalTypeId::STRUCT) {
|
|
56
|
+
bind_data.top_level_type = JSONScanTopLevelType::ARRAY_OF_OBJECTS;
|
|
57
|
+
bind_data.options.format = JSONFormat::UNSTRUCTURED;
|
|
58
|
+
type = ListType::GetChildType(type);
|
|
59
|
+
}
|
|
60
|
+
|
|
62
61
|
if (type.id() != LogicalTypeId::STRUCT) {
|
|
63
62
|
return_types.emplace_back(type);
|
|
64
63
|
names.emplace_back("json");
|
|
65
|
-
bind_data.
|
|
64
|
+
bind_data.top_level_type = JSONScanTopLevelType::OTHER;
|
|
66
65
|
} else {
|
|
67
66
|
const auto &child_types = StructType::GetChildTypes(type);
|
|
68
67
|
return_types.reserve(child_types.size());
|
|
@@ -189,9 +188,11 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
|
189
188
|
auto &gstate = ((JSONGlobalTableFunctionState &)*data_p.global_state).state;
|
|
190
189
|
auto &lstate = ((JSONLocalTableFunctionState &)*data_p.local_state).state;
|
|
191
190
|
|
|
192
|
-
// Fetch next lines
|
|
193
191
|
const auto count = lstate.ReadNext(gstate);
|
|
194
|
-
const auto objects =
|
|
192
|
+
const auto objects = gstate.bind_data.top_level_type == JSONScanTopLevelType::ARRAY_OF_OBJECTS
|
|
193
|
+
? lstate.array_objects
|
|
194
|
+
: lstate.objects;
|
|
195
|
+
output.SetCardinality(count);
|
|
195
196
|
|
|
196
197
|
vector<Vector *> result_vectors;
|
|
197
198
|
result_vectors.reserve(output.ColumnCount());
|
|
@@ -202,13 +203,14 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
|
202
203
|
|
|
203
204
|
// Pass current reader to transform options so we can get line number information if an error occurs
|
|
204
205
|
bool success;
|
|
205
|
-
if (gstate.bind_data.
|
|
206
|
-
success = JSONTransform::TransformObject(objects, lstate.GetAllocator(), count, gstate.bind_data.names,
|
|
207
|
-
result_vectors, lstate.transform_options);
|
|
208
|
-
} else {
|
|
206
|
+
if (gstate.bind_data.top_level_type == JSONScanTopLevelType::OTHER) {
|
|
209
207
|
success = JSONTransform::Transform(objects, lstate.GetAllocator(), *result_vectors[0], count,
|
|
210
208
|
lstate.transform_options);
|
|
209
|
+
} else {
|
|
210
|
+
success = JSONTransform::TransformObject(objects, lstate.GetAllocator(), count, gstate.bind_data.names,
|
|
211
|
+
result_vectors, lstate.transform_options);
|
|
211
212
|
}
|
|
213
|
+
|
|
212
214
|
if (!success) {
|
|
213
215
|
string hint = gstate.bind_data.auto_detect
|
|
214
216
|
? "\nTry increasing 'sample_size', reducing 'maximum_depth', specifying 'columns' manually, "
|
|
@@ -217,7 +219,6 @@ static void ReadJSONFunction(ClientContext &context, TableFunctionInput &data_p,
|
|
|
217
219
|
lstate.ThrowTransformError(count, lstate.transform_options.object_index,
|
|
218
220
|
lstate.transform_options.error_message + hint);
|
|
219
221
|
}
|
|
220
|
-
output.SetCardinality(count);
|
|
221
222
|
}
|
|
222
223
|
|
|
223
224
|
TableFunction JSONFunctions::GetReadJSONTableFunction(bool list_parameter, shared_ptr<JSONScanInfo> function_info) {
|
|
@@ -235,6 +236,7 @@ TableFunction JSONFunctions::GetReadJSONTableFunction(bool list_parameter, share
|
|
|
235
236
|
table_function.named_parameters["timestamp_format"] = LogicalType::VARCHAR;
|
|
236
237
|
|
|
237
238
|
table_function.projection_pushdown = true;
|
|
239
|
+
// TODO: might be able to do filter pushdown/prune too
|
|
238
240
|
|
|
239
241
|
table_function.function_info = std::move(function_info);
|
|
240
242
|
|
|
@@ -166,6 +166,12 @@ vector<CreateTableFunctionInfo> JSONFunctions::GetTableFunctions() {
|
|
|
166
166
|
unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context, const string &table_name,
|
|
167
167
|
ReplacementScanData *data) {
|
|
168
168
|
auto lower_name = StringUtil::Lower(table_name);
|
|
169
|
+
// remove any compression
|
|
170
|
+
if (StringUtil::EndsWith(lower_name, ".gz")) {
|
|
171
|
+
lower_name = lower_name.substr(0, lower_name.size() - 3);
|
|
172
|
+
} else if (StringUtil::EndsWith(lower_name, ".zst")) {
|
|
173
|
+
lower_name = lower_name.substr(0, lower_name.size() - 4);
|
|
174
|
+
}
|
|
169
175
|
if (!StringUtil::EndsWith(lower_name, ".json") && !StringUtil::Contains(lower_name, ".json?") &&
|
|
170
176
|
!StringUtil::EndsWith(lower_name, ".ndjson") && !StringUtil::Contains(lower_name, ".ndjson?")) {
|
|
171
177
|
return nullptr;
|
|
@@ -48,8 +48,11 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
|
|
|
48
48
|
options.format = JSONFormat::UNSTRUCTURED;
|
|
49
49
|
} else if (format == "newline_delimited") {
|
|
50
50
|
options.format = JSONFormat::NEWLINE_DELIMITED;
|
|
51
|
+
} else if (format == "array_of_objects") {
|
|
52
|
+
result->top_level_type = JSONScanTopLevelType::ARRAY_OF_OBJECTS;
|
|
51
53
|
} else {
|
|
52
|
-
throw BinderException(
|
|
54
|
+
throw BinderException(
|
|
55
|
+
"format must be one of ['auto', 'unstructured', 'newline_delimited', 'array_of_objects']");
|
|
53
56
|
}
|
|
54
57
|
} else if (loption == "compression") {
|
|
55
58
|
auto compression = StringUtil::Lower(StringValue::Get(kv.second));
|
|
@@ -67,6 +70,10 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
|
|
|
67
70
|
}
|
|
68
71
|
}
|
|
69
72
|
|
|
73
|
+
if (result->top_level_type == JSONScanTopLevelType::ARRAY_OF_OBJECTS) {
|
|
74
|
+
result->options.format = JSONFormat::UNSTRUCTURED;
|
|
75
|
+
}
|
|
76
|
+
|
|
70
77
|
return std::move(result);
|
|
71
78
|
}
|
|
72
79
|
|
|
@@ -98,6 +105,27 @@ void JSONScanData::InitializeFormats() {
|
|
|
98
105
|
if (!timestamp_format.empty()) {
|
|
99
106
|
date_format_map.AddFormat(LogicalTypeId::TIMESTAMP, timestamp_format);
|
|
100
107
|
}
|
|
108
|
+
|
|
109
|
+
if (auto_detect) {
|
|
110
|
+
static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
|
|
111
|
+
{LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
|
|
112
|
+
{LogicalTypeId::TIMESTAMP,
|
|
113
|
+
{"%Y-%m-%d %H:%M:%S.%f", "%m-%d-%Y %I:%M:%S %p", "%m-%d-%y %I:%M:%S %p", "%d-%m-%Y %H:%M:%S",
|
|
114
|
+
"%d-%m-%y %H:%M:%S", "%Y-%m-%d %H:%M:%S", "%y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"}},
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// Populate possible date/timestamp formats, assume this is consistent across columns
|
|
118
|
+
for (auto &kv : FORMAT_TEMPLATES) {
|
|
119
|
+
const auto &type = kv.first;
|
|
120
|
+
if (date_format_map.HasFormats(type)) {
|
|
121
|
+
continue; // Already populated
|
|
122
|
+
}
|
|
123
|
+
const auto &format_strings = kv.second;
|
|
124
|
+
for (auto &format_string : format_strings) {
|
|
125
|
+
date_format_map.AddFormat(type, format_string);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
101
129
|
}
|
|
102
130
|
|
|
103
131
|
void JSONScanData::Serialize(FieldWriter &writer) {
|
|
@@ -112,9 +140,17 @@ void JSONScanData::Serialize(FieldWriter &writer) {
|
|
|
112
140
|
writer.WriteList<string>(names);
|
|
113
141
|
writer.WriteList<idx_t>(valid_cols);
|
|
114
142
|
writer.WriteField<idx_t>(max_depth);
|
|
115
|
-
writer.WriteField<
|
|
116
|
-
|
|
117
|
-
|
|
143
|
+
writer.WriteField<JSONScanTopLevelType>(top_level_type);
|
|
144
|
+
if (!date_format.empty()) {
|
|
145
|
+
writer.WriteString(date_format);
|
|
146
|
+
} else {
|
|
147
|
+
writer.WriteString(date_format_map.GetFormat(LogicalTypeId::DATE).format_specifier);
|
|
148
|
+
}
|
|
149
|
+
if (!timestamp_format.empty()) {
|
|
150
|
+
writer.WriteString(timestamp_format);
|
|
151
|
+
} else {
|
|
152
|
+
writer.WriteString(date_format_map.GetFormat(LogicalTypeId::TIMESTAMP).format_specifier);
|
|
153
|
+
}
|
|
118
154
|
}
|
|
119
155
|
|
|
120
156
|
void JSONScanData::Deserialize(FieldReader &reader) {
|
|
@@ -129,9 +165,12 @@ void JSONScanData::Deserialize(FieldReader &reader) {
|
|
|
129
165
|
names = reader.ReadRequiredList<string>();
|
|
130
166
|
valid_cols = reader.ReadRequiredList<idx_t>();
|
|
131
167
|
max_depth = reader.ReadRequired<idx_t>();
|
|
132
|
-
|
|
168
|
+
top_level_type = reader.ReadRequired<JSONScanTopLevelType>();
|
|
133
169
|
date_format = reader.ReadRequired<string>();
|
|
134
170
|
timestamp_format = reader.ReadRequired<string>();
|
|
171
|
+
|
|
172
|
+
InitializeFormats();
|
|
173
|
+
transform_options.date_format_map = &date_format_map;
|
|
135
174
|
}
|
|
136
175
|
|
|
137
176
|
JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, JSONScanData &bind_data_p)
|
|
@@ -150,9 +189,9 @@ JSONScanGlobalState::JSONScanGlobalState(ClientContext &context, JSONScanData &b
|
|
|
150
189
|
}
|
|
151
190
|
|
|
152
191
|
JSONScanLocalState::JSONScanLocalState(ClientContext &context, JSONScanGlobalState &gstate)
|
|
153
|
-
: batch_index(DConstants::INVALID_INDEX), bind_data(gstate.bind_data),
|
|
192
|
+
: scan_count(0), array_idx(0), array_offset(0), batch_index(DConstants::INVALID_INDEX), bind_data(gstate.bind_data),
|
|
154
193
|
json_allocator(BufferAllocator::Get(context)), current_reader(nullptr), current_buffer_handle(nullptr),
|
|
155
|
-
buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) {
|
|
194
|
+
is_last(false), buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) {
|
|
156
195
|
|
|
157
196
|
// Buffer to reconstruct JSON objects when they cross a buffer boundary
|
|
158
197
|
reconstruct_buffer = gstate.allocator.Allocate(gstate.bind_data.maximum_object_size + YYJSON_PADDING_SIZE);
|
|
@@ -174,11 +213,6 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
|
174
213
|
// Perform projection pushdown
|
|
175
214
|
if (bind_data.type == JSONScanType::READ_JSON) {
|
|
176
215
|
D_ASSERT(input.column_ids.size() <= bind_data.names.size()); // Can't project to have more columns
|
|
177
|
-
if (bind_data.auto_detect && input.column_ids.size() < bind_data.names.size()) {
|
|
178
|
-
// If we are auto-detecting, but don't need all columns present in the file,
|
|
179
|
-
// then we don't need to throw an error if we encounter an unseen column
|
|
180
|
-
bind_data.transform_options.error_unknown_key = false;
|
|
181
|
-
}
|
|
182
216
|
vector<string> names;
|
|
183
217
|
names.reserve(input.column_ids.size());
|
|
184
218
|
for (idx_t i = 0; i < input.column_ids.size(); i++) {
|
|
@@ -189,6 +223,11 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
|
189
223
|
names.push_back(std::move(bind_data.names[id]));
|
|
190
224
|
bind_data.valid_cols.push_back(i);
|
|
191
225
|
}
|
|
226
|
+
if (names.size() < bind_data.names.size()) {
|
|
227
|
+
// If we are auto-detecting, but don't need all columns present in the file,
|
|
228
|
+
// then we don't need to throw an error if we encounter an unseen column
|
|
229
|
+
bind_data.transform_options.error_unknown_key = false;
|
|
230
|
+
}
|
|
192
231
|
bind_data.names = std::move(names);
|
|
193
232
|
}
|
|
194
233
|
return result;
|
|
@@ -231,6 +270,10 @@ static inline void SkipWhitespace(const char *buffer_ptr, idx_t &buffer_offset,
|
|
|
231
270
|
idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
|
|
232
271
|
json_allocator.Reset();
|
|
233
272
|
|
|
273
|
+
if (gstate.bind_data.top_level_type == JSONScanTopLevelType::ARRAY_OF_OBJECTS && array_idx < scan_count) {
|
|
274
|
+
return GetObjectsFromArray();
|
|
275
|
+
}
|
|
276
|
+
|
|
234
277
|
idx_t count = 0;
|
|
235
278
|
if (buffer_offset == buffer_size) {
|
|
236
279
|
if (!ReadNextBuffer(gstate)) {
|
|
@@ -254,10 +297,20 @@ idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) {
|
|
|
254
297
|
default:
|
|
255
298
|
throw InternalException("Unknown JSON format");
|
|
256
299
|
}
|
|
300
|
+
scan_count = count;
|
|
257
301
|
|
|
258
302
|
// Skip over any remaining whitespace for the next scan
|
|
259
303
|
SkipWhitespace(buffer_ptr, buffer_offset, buffer_size);
|
|
260
304
|
|
|
305
|
+
if (gstate.bind_data.top_level_type == JSONScanTopLevelType::ARRAY_OF_OBJECTS) {
|
|
306
|
+
if (scan_count > 1) {
|
|
307
|
+
throw InvalidInputException("File must have exactly one array of objects when format='array_of_objects'");
|
|
308
|
+
}
|
|
309
|
+
array_idx = 0;
|
|
310
|
+
array_offset = 0;
|
|
311
|
+
return GetObjectsFromArray();
|
|
312
|
+
}
|
|
313
|
+
|
|
261
314
|
return count;
|
|
262
315
|
}
|
|
263
316
|
|
|
@@ -332,10 +385,39 @@ yyjson_val *JSONScanLocalState::ParseLine(char *line_start, idx_t line_size, idx
|
|
|
332
385
|
}
|
|
333
386
|
}
|
|
334
387
|
|
|
388
|
+
idx_t JSONScanLocalState::GetObjectsFromArray() {
|
|
389
|
+
idx_t arr_count = 0;
|
|
390
|
+
|
|
391
|
+
size_t idx, max;
|
|
392
|
+
yyjson_val *val;
|
|
393
|
+
for (; array_idx < scan_count; array_idx++, array_offset = 0) {
|
|
394
|
+
if (objects[array_idx]) {
|
|
395
|
+
yyjson_arr_foreach(objects[array_idx], idx, max, val) {
|
|
396
|
+
if (idx < array_offset) {
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
array_objects[arr_count++] = val;
|
|
400
|
+
if (arr_count == STANDARD_VECTOR_SIZE) {
|
|
401
|
+
break;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
array_offset = idx + 1;
|
|
405
|
+
if (arr_count == STANDARD_VECTOR_SIZE) {
|
|
406
|
+
break;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
return arr_count;
|
|
411
|
+
}
|
|
412
|
+
|
|
335
413
|
bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
336
414
|
if (current_reader) {
|
|
337
415
|
D_ASSERT(current_buffer_handle);
|
|
338
416
|
current_reader->SetBufferLineOrObjectCount(current_buffer_handle->buffer_index, lines_or_objects_in_buffer);
|
|
417
|
+
if (is_last && gstate.bind_data.type != JSONScanType::SAMPLE) {
|
|
418
|
+
// Close files that are done if we're not sampling
|
|
419
|
+
current_reader->CloseJSONFile();
|
|
420
|
+
}
|
|
339
421
|
}
|
|
340
422
|
|
|
341
423
|
AllocatedData buffer;
|
|
@@ -396,7 +478,9 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
|
396
478
|
// Unopened file
|
|
397
479
|
current_reader->OpenJSONFile();
|
|
398
480
|
batch_index = gstate.batch_index++;
|
|
399
|
-
if (options.format == JSONFormat::UNSTRUCTURED
|
|
481
|
+
if (options.format == JSONFormat::UNSTRUCTURED || (options.format == JSONFormat::NEWLINE_DELIMITED &&
|
|
482
|
+
options.compression != FileCompressionType::UNCOMPRESSED &&
|
|
483
|
+
gstate.file_index < gstate.json_readers.size())) {
|
|
400
484
|
gstate.file_index++; // UNSTRUCTURED necessitates single-threaded read
|
|
401
485
|
}
|
|
402
486
|
if (options.format != JSONFormat::AUTO_DETECT) {
|
|
@@ -450,9 +534,6 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) {
|
|
|
450
534
|
auto json_buffer_handle = make_unique<JSONBufferHandle>(buffer_index, readers, std::move(buffer), buffer_size);
|
|
451
535
|
current_buffer_handle = json_buffer_handle.get();
|
|
452
536
|
current_reader->InsertBuffer(buffer_index, std::move(json_buffer_handle));
|
|
453
|
-
if (!current_reader->GetFileHandle().PlainFileSource() && gstate.bind_data.type == JSONScanType::SAMPLE) {
|
|
454
|
-
// TODO: store buffer
|
|
455
|
-
}
|
|
456
537
|
|
|
457
538
|
buffer_offset = 0;
|
|
458
539
|
prev_buffer_remainder = 0;
|
|
@@ -508,16 +589,18 @@ void JSONScanLocalState::ReadNextBufferSeek(JSONScanGlobalState &gstate, idx_t &
|
|
|
508
589
|
}
|
|
509
590
|
|
|
510
591
|
void JSONScanLocalState::ReadNextBufferNoSeek(JSONScanGlobalState &gstate, idx_t &buffer_index) {
|
|
511
|
-
auto &file_handle = current_reader->GetFileHandle();
|
|
512
|
-
|
|
513
592
|
idx_t request_size = gstate.buffer_capacity - prev_buffer_remainder - YYJSON_PADDING_SIZE;
|
|
514
593
|
idx_t read_size;
|
|
515
594
|
{
|
|
516
595
|
lock_guard<mutex> reader_guard(current_reader->lock);
|
|
517
596
|
buffer_index = current_reader->GetBufferIndex();
|
|
518
597
|
|
|
519
|
-
|
|
520
|
-
|
|
598
|
+
if (current_reader->IsOpen()) {
|
|
599
|
+
read_size = current_reader->GetFileHandle().Read(buffer_ptr + prev_buffer_remainder, request_size,
|
|
600
|
+
gstate.bind_data.type == JSONScanType::SAMPLE);
|
|
601
|
+
} else {
|
|
602
|
+
read_size = 0;
|
|
603
|
+
}
|
|
521
604
|
is_last = read_size < request_size;
|
|
522
605
|
|
|
523
606
|
if (!gstate.bind_data.ignore_errors && read_size == 0 && prev_buffer_remainder != 0) {
|
|
@@ -583,6 +666,11 @@ void JSONScanLocalState::ReconstructFirstObject(JSONScanGlobalState &gstate) {
|
|
|
583
666
|
}
|
|
584
667
|
|
|
585
668
|
void JSONScanLocalState::ReadUnstructured(idx_t &count) {
|
|
669
|
+
// yyjson does not always return YYJSON_READ_ERROR_UNEXPECTED_END properly
|
|
670
|
+
// if a different error code happens within the last 50 bytes
|
|
671
|
+
// we assume it should be YYJSON_READ_ERROR_UNEXPECTED_END instead
|
|
672
|
+
static constexpr idx_t END_BOUND = 50;
|
|
673
|
+
|
|
586
674
|
const auto max_obj_size = reconstruct_buffer.GetSize();
|
|
587
675
|
yyjson_read_err error;
|
|
588
676
|
for (; count < STANDARD_VECTOR_SIZE; count++) {
|
|
@@ -608,8 +696,7 @@ void JSONScanLocalState::ReadUnstructured(idx_t &count) {
|
|
|
608
696
|
} else if (error.pos > max_obj_size) {
|
|
609
697
|
current_reader->ThrowParseError(current_buffer_handle->buffer_index, lines_or_objects_in_buffer, error,
|
|
610
698
|
"Try increasing \"maximum_object_size\".");
|
|
611
|
-
|
|
612
|
-
} else if (error.code == YYJSON_READ_ERROR_UNEXPECTED_END && !is_last) {
|
|
699
|
+
} else if (!is_last && (error.code == YYJSON_READ_ERROR_UNEXPECTED_END || remaining - error.pos < END_BOUND)) {
|
|
613
700
|
// Copy remaining to reconstruct_buffer
|
|
614
701
|
const auto reconstruct_ptr = reconstruct_buffer.get();
|
|
615
702
|
memcpy(reconstruct_ptr, obj_copy_start, remaining);
|
|
@@ -504,11 +504,32 @@ LogicalType TransformStringToLogicalType(const string &str) {
|
|
|
504
504
|
return Parser::ParseColumnList("dummy " + str).GetColumn(LogicalIndex(0)).Type();
|
|
505
505
|
}
|
|
506
506
|
|
|
507
|
+
LogicalType GetUserTypeRecursive(const LogicalType &type, ClientContext &context) {
|
|
508
|
+
if (type.id() == LogicalTypeId::USER && type.HasAlias()) {
|
|
509
|
+
return Catalog::GetSystemCatalog(context).GetType(context, SYSTEM_CATALOG, DEFAULT_SCHEMA, type.GetAlias());
|
|
510
|
+
}
|
|
511
|
+
// Look for LogicalTypeId::USER in nested types
|
|
512
|
+
if (type.id() == LogicalTypeId::STRUCT) {
|
|
513
|
+
child_list_t<LogicalType> children;
|
|
514
|
+
children.reserve(StructType::GetChildCount(type));
|
|
515
|
+
for (auto &child : StructType::GetChildTypes(type)) {
|
|
516
|
+
children.emplace_back(child.first, GetUserTypeRecursive(child.second, context));
|
|
517
|
+
}
|
|
518
|
+
return LogicalType::STRUCT(std::move(children));
|
|
519
|
+
}
|
|
520
|
+
if (type.id() == LogicalTypeId::LIST) {
|
|
521
|
+
return LogicalType::LIST(GetUserTypeRecursive(ListType::GetChildType(type), context));
|
|
522
|
+
}
|
|
523
|
+
if (type.id() == LogicalTypeId::MAP) {
|
|
524
|
+
return LogicalType::MAP(GetUserTypeRecursive(MapType::KeyType(type), context),
|
|
525
|
+
GetUserTypeRecursive(MapType::ValueType(type), context));
|
|
526
|
+
}
|
|
527
|
+
// Not LogicalTypeId::USER or a nested type
|
|
528
|
+
return type;
|
|
529
|
+
}
|
|
530
|
+
|
|
507
531
|
LogicalType TransformStringToLogicalType(const string &str, ClientContext &context) {
|
|
508
|
-
|
|
509
|
-
return type.id() == LogicalTypeId::USER
|
|
510
|
-
? Catalog::GetSystemCatalog(context).GetType(context, SYSTEM_CATALOG, DEFAULT_SCHEMA, str)
|
|
511
|
-
: type;
|
|
532
|
+
return GetUserTypeRecursive(TransformStringToLogicalType(str), context);
|
|
512
533
|
}
|
|
513
534
|
|
|
514
535
|
bool LogicalType::IsIntegral() const {
|
|
@@ -888,18 +909,23 @@ void LogicalType::SetAlias(string alias) {
|
|
|
888
909
|
}
|
|
889
910
|
|
|
890
911
|
string LogicalType::GetAlias() const {
|
|
891
|
-
if (
|
|
892
|
-
return
|
|
893
|
-
}
|
|
912
|
+
if (id() == LogicalTypeId::USER) {
|
|
913
|
+
return UserType::GetTypeName(*this);
|
|
914
|
+
}
|
|
915
|
+
if (type_info_) {
|
|
894
916
|
return type_info_->alias;
|
|
895
917
|
}
|
|
918
|
+
return string();
|
|
896
919
|
}
|
|
897
920
|
|
|
898
921
|
bool LogicalType::HasAlias() const {
|
|
899
|
-
if (
|
|
900
|
-
return
|
|
922
|
+
if (id() == LogicalTypeId::USER) {
|
|
923
|
+
return !UserType::GetTypeName(*this).empty();
|
|
924
|
+
}
|
|
925
|
+
if (type_info_ && !type_info_->alias.empty()) {
|
|
926
|
+
return true;
|
|
901
927
|
}
|
|
902
|
-
return
|
|
928
|
+
return false;
|
|
903
929
|
}
|
|
904
930
|
|
|
905
931
|
void LogicalType::SetCatalog(LogicalType &type, TypeCatalogEntry *catalog_entry) {
|
|
@@ -721,6 +721,9 @@ void BufferedCSVReader::DetectHeader(const vector<vector<LogicalType>> &best_sql
|
|
|
721
721
|
names.push_back(column_name);
|
|
722
722
|
}
|
|
723
723
|
}
|
|
724
|
+
for (idx_t i = 0; i < MinValue<idx_t>(names.size(), options.name_list.size()); i++) {
|
|
725
|
+
names[i] = options.name_list[i];
|
|
726
|
+
}
|
|
724
727
|
}
|
|
725
728
|
|
|
726
729
|
vector<LogicalType> BufferedCSVReader::RefineTypeDetection(const vector<LogicalType> &type_candidates,
|
|
@@ -99,6 +99,17 @@ static unique_ptr<FunctionData> ReadCSVBind(ClientContext &context, TableFunctio
|
|
|
99
99
|
if (names.empty()) {
|
|
100
100
|
throw BinderException("read_csv requires at least a single column as input!");
|
|
101
101
|
}
|
|
102
|
+
} else if (loption == "column_names" || loption == "names") {
|
|
103
|
+
if (!options.name_list.empty()) {
|
|
104
|
+
throw BinderException("read_csv_auto column_names/names can only be supplied once");
|
|
105
|
+
}
|
|
106
|
+
if (kv.second.IsNull()) {
|
|
107
|
+
throw BinderException("read_csv_auto %s cannot be NULL", kv.first);
|
|
108
|
+
}
|
|
109
|
+
auto &children = ListValue::GetChildren(kv.second);
|
|
110
|
+
for (auto &child : children) {
|
|
111
|
+
options.name_list.push_back(StringValue::Get(child));
|
|
112
|
+
}
|
|
102
113
|
} else if (loption == "column_types" || loption == "types" || loption == "dtypes") {
|
|
103
114
|
auto &child_type = kv.second.type();
|
|
104
115
|
if (child_type.id() != LogicalTypeId::STRUCT && child_type.id() != LogicalTypeId::LIST) {
|
|
@@ -961,6 +972,8 @@ TableFunction ReadCSVTableFunction::GetAutoFunction(bool list_parameter) {
|
|
|
961
972
|
read_csv_auto.named_parameters["column_types"] = LogicalType::ANY;
|
|
962
973
|
read_csv_auto.named_parameters["dtypes"] = LogicalType::ANY;
|
|
963
974
|
read_csv_auto.named_parameters["types"] = LogicalType::ANY;
|
|
975
|
+
read_csv_auto.named_parameters["names"] = LogicalType::LIST(LogicalType::VARCHAR);
|
|
976
|
+
read_csv_auto.named_parameters["column_names"] = LogicalType::LIST(LogicalType::VARCHAR);
|
|
964
977
|
return read_csv_auto;
|
|
965
978
|
}
|
|
966
979
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
#ifndef DUCKDB_VERSION
|
|
2
|
-
#define DUCKDB_VERSION "0.7.1-
|
|
2
|
+
#define DUCKDB_VERSION "0.7.1-dev137"
|
|
3
3
|
#endif
|
|
4
4
|
#ifndef DUCKDB_SOURCE_ID
|
|
5
|
-
#define DUCKDB_SOURCE_ID "
|
|
5
|
+
#define DUCKDB_SOURCE_ID "eb65c593fe"
|
|
6
6
|
#endif
|
|
7
7
|
#include "duckdb/function/table/system_functions.hpp"
|
|
8
8
|
#include "duckdb/main/database.hpp"
|
|
@@ -75,6 +75,8 @@ struct BufferedCSVReaderOptions {
|
|
|
75
75
|
case_insensitive_map_t<idx_t> sql_types_per_column;
|
|
76
76
|
//! User-defined SQL type list
|
|
77
77
|
vector<LogicalType> sql_type_list;
|
|
78
|
+
//! User-defined name list
|
|
79
|
+
vector<string> name_list;
|
|
78
80
|
//===--------------------------------------------------------------------===//
|
|
79
81
|
// ReadCSVOptions
|
|
80
82
|
//===--------------------------------------------------------------------===//
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
#include "duckdb/common/common.hpp"
|
|
12
12
|
#include "duckdb/common/enums/output_type.hpp"
|
|
13
13
|
#include "duckdb/common/types/value.hpp"
|
|
14
|
-
#include "duckdb/common/
|
|
14
|
+
#include "duckdb/common/case_insensitive_map.hpp"
|
|
15
15
|
#include "duckdb/common/atomic.hpp"
|
|
16
16
|
|
|
17
17
|
namespace duckdb {
|
|
@@ -39,7 +39,7 @@ struct ClientData {
|
|
|
39
39
|
//! The set of temporary objects that belong to this client
|
|
40
40
|
shared_ptr<AttachedDatabase> temporary_objects;
|
|
41
41
|
//! The set of bound prepared statements that belong to this client
|
|
42
|
-
|
|
42
|
+
case_insensitive_map_t<shared_ptr<PreparedStatementData>> prepared_statements;
|
|
43
43
|
|
|
44
44
|
//! The writer used to log queries (if logging is enabled)
|
|
45
45
|
unique_ptr<BufferedFileWriter> log_query_writer;
|
|
@@ -11,16 +11,6 @@ CopyStatement::CopyStatement(const CopyStatement &other) : SQLStatement(other),
|
|
|
11
11
|
}
|
|
12
12
|
}
|
|
13
13
|
|
|
14
|
-
string ConvertOptionValueToString(const Value &val) {
|
|
15
|
-
auto type = val.type().id();
|
|
16
|
-
switch (type) {
|
|
17
|
-
case LogicalTypeId::VARCHAR:
|
|
18
|
-
return KeywordHelper::WriteOptionallyQuoted(val.ToString());
|
|
19
|
-
default:
|
|
20
|
-
return val.ToString();
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
|
-
|
|
24
14
|
string CopyStatement::CopyOptionsToString(const string &format,
|
|
25
15
|
const case_insensitive_map_t<vector<Value>> &options) const {
|
|
26
16
|
if (format.empty() && options.empty()) {
|
|
@@ -45,15 +35,14 @@ string CopyStatement::CopyOptionsToString(const string &format,
|
|
|
45
35
|
// Options like HEADER don't need an explicit value
|
|
46
36
|
// just providing the name already sets it to true
|
|
47
37
|
} else if (values.size() == 1) {
|
|
48
|
-
result +=
|
|
38
|
+
result += values[0].ToSQLString();
|
|
49
39
|
} else {
|
|
50
40
|
result += "( ";
|
|
51
41
|
for (idx_t i = 0; i < values.size(); i++) {
|
|
52
|
-
auto &value = values[i];
|
|
53
42
|
if (i) {
|
|
54
43
|
result += ", ";
|
|
55
44
|
}
|
|
56
|
-
result +=
|
|
45
|
+
result += values[i].ToSQLString();
|
|
57
46
|
}
|
|
58
47
|
result += " )";
|
|
59
48
|
}
|
|
@@ -9,16 +9,16 @@ unique_ptr<ParsedExpression> Transformer::TransformCase(duckdb_libpgquery::PGCas
|
|
|
9
9
|
D_ASSERT(root);
|
|
10
10
|
|
|
11
11
|
auto case_node = make_unique<CaseExpression>();
|
|
12
|
+
auto root_arg = TransformExpression(reinterpret_cast<duckdb_libpgquery::PGNode *>(root->arg));
|
|
12
13
|
for (auto cell = root->args->head; cell != nullptr; cell = cell->next) {
|
|
13
14
|
CaseCheck case_check;
|
|
14
15
|
|
|
15
16
|
auto w = reinterpret_cast<duckdb_libpgquery::PGCaseWhen *>(cell->data.ptr_value);
|
|
16
17
|
auto test_raw = TransformExpression(reinterpret_cast<duckdb_libpgquery::PGNode *>(w->expr));
|
|
17
18
|
unique_ptr<ParsedExpression> test;
|
|
18
|
-
|
|
19
|
-
if (arg) {
|
|
19
|
+
if (root_arg) {
|
|
20
20
|
case_check.when_expr =
|
|
21
|
-
make_unique<ComparisonExpression>(ExpressionType::COMPARE_EQUAL,
|
|
21
|
+
make_unique<ComparisonExpression>(ExpressionType::COMPARE_EQUAL, root_arg->Copy(), std::move(test_raw));
|
|
22
22
|
} else {
|
|
23
23
|
case_check.when_expr = std::move(test_raw);
|
|
24
24
|
}
|