npm - duckdb - Versions diffs - 0.7.1-dev90.0 → 0.7.2-dev0.0 - Mend

duckdb 0.7.1-dev90.0 → 0.7.2-dev0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # DuckDB Node Bindings
-This package provides a node.js API for [DuckDB](https://github.com/cwida/duckdb), the "SQLite for Analytics". The API for this client is somewhat compliant to the SQLite node.js client for easier transition (and transition you must eventually).
+This package provides a node.js API for [DuckDB](https://github.com/duckdb/duckdb), the "SQLite for Analytics". The API for this client is somewhat compliant to the SQLite node.js client for easier transition (and transition you must eventually).
 Load the package and create a database object:

package/binding.gyp CHANGED Viewed

@@ -222,16 +222,16 @@
                 "src/duckdb/third_party/zstd/compress/zstd_lazy.cpp",
                 "src/duckdb/third_party/zstd/compress/zstd_ldm.cpp",
                 "src/duckdb/third_party/zstd/compress/zstd_opt.cpp",
-                "src/duckdb/extension/icu/./icu-timezone.cpp",
-                "src/duckdb/extension/icu/./icu-makedate.cpp",
-                "src/duckdb/extension/icu/./icu-datepart.cpp",
-                "src/duckdb/extension/icu/./icu-datesub.cpp",
+                "src/duckdb/extension/icu/./icu-dateadd.cpp",
                 "src/duckdb/extension/icu/./icu-datetrunc.cpp",
-                "src/duckdb/extension/icu/./icu-timebucket.cpp",
                 "src/duckdb/extension/icu/./icu-strptime.cpp",
-                "src/duckdb/extension/icu/./icu-extension.cpp",
-                "src/duckdb/extension/icu/./icu-dateadd.cpp",
                 "src/duckdb/extension/icu/./icu-datefunc.cpp",
+                "src/duckdb/extension/icu/./icu-extension.cpp",
+                "src/duckdb/extension/icu/./icu-makedate.cpp",
+                "src/duckdb/extension/icu/./icu-timezone.cpp",
+                "src/duckdb/extension/icu/./icu-datesub.cpp",
+                "src/duckdb/extension/icu/./icu-timebucket.cpp",
+                "src/duckdb/extension/icu/./icu-datepart.cpp",
                 "src/duckdb/ub_extension_icu_third_party_icu_common.cpp",
                 "src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp",
                 "src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp",

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.7.1-dev90.0",
+  "version": "0.7.2-dev0.0",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {
@@ -41,7 +41,7 @@
   },
   "repository": {
     "type": "git",
-    "url": "git+https://github.com/cwida/duckdb.git"
+    "url": "git+https://github.com/duckdb/duckdb.git"
   },
   "ts-node": {
     "require": [
@@ -56,7 +56,7 @@
   "author": "Hannes Mühleisen",
   "license": "MPL-2.0",
   "bugs": {
-    "url": "https://github.com/cwida/duckdb/issues"
+    "url": "https://github.com/duckdb/duckdb/issues"
   },
   "homepage": "https://www.duckdb.org"
 }

package/src/duckdb/extension/json/buffered_json_reader.cpp CHANGED Viewed

@@ -25,7 +25,12 @@ JSONBufferHandle::JSONBufferHandle(idx_t buffer_index_p, idx_t readers_p, Alloca
 JSONFileHandle::JSONFileHandle(unique_ptr<FileHandle> file_handle_p, Allocator &allocator_p)
     : file_handle(std::move(file_handle_p)), allocator(allocator_p), can_seek(file_handle->CanSeek()),
       plain_file_source(file_handle->OnDiskFile() && can_seek), file_size(file_handle->GetFileSize()), read_position(0),
-      cached_size(0) {
+      requested_reads(0), actual_reads(0), cached_size(0) {
+}
+void JSONFileHandle::Close() {
+	file_handle->Close();
+	cached_buffers.clear();
 }
 idx_t JSONFileHandle::FileSize() const {
@@ -36,10 +41,6 @@ idx_t JSONFileHandle::Remaining() const {
 	return file_size - read_position;
 }
-bool JSONFileHandle::PlainFileSource() const {
-	return plain_file_source;
-}
 bool JSONFileHandle::CanSeek() const {
 	return can_seek;
 }
@@ -53,6 +54,9 @@ idx_t JSONFileHandle::GetPositionAndSize(idx_t &position, idx_t requested_size)
 	position = read_position;
 	auto actual_size = MinValue<idx_t>(requested_size, Remaining());
 	read_position += actual_size;
+	if (actual_size != 0) {
+		requested_reads++;
+	}
 	return actual_size;
 }
@@ -60,11 +64,13 @@ void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t posit
 	D_ASSERT(size != 0);
 	if (plain_file_source) {
 		file_handle->Read((void *)pointer, size, position);
+		actual_reads++;
 		return;
 	}
 	if (sample_run) { // Cache the buffer
 		file_handle->Read((void *)pointer, size, position);
+		actual_reads++;
 		cached_buffers.emplace_back(allocator.Allocate(size));
 		memcpy(cached_buffers.back().get(), pointer, size);
 		cached_size += size;
@@ -73,22 +79,24 @@ void JSONFileHandle::ReadAtPosition(const char *pointer, idx_t size, idx_t posit
 	if (!cached_buffers.empty() || position < cached_size) {
 		ReadFromCache(pointer, size, position);
+		actual_reads++;
 	}
 	if (size != 0) {
 		file_handle->Read((void *)pointer, size, position);
+		actual_reads++;
 	}
 }
 idx_t JSONFileHandle::Read(const char *pointer, idx_t requested_size, bool sample_run) {
 	D_ASSERT(requested_size != 0);
 	if (plain_file_source) {
-		auto actual_size = file_handle->Read((void *)pointer, requested_size);
+		auto actual_size = ReadInternal(pointer, requested_size);
 		read_position += actual_size;
 		return actual_size;
 	}
 	if (sample_run) { // Cache the buffer
-		auto actual_size = file_handle->Read((void *)pointer, requested_size);
+		auto actual_size = ReadInternal(pointer, requested_size);
 		if (actual_size > 0) {
 			cached_buffers.emplace_back(allocator.Allocate(actual_size));
 			memcpy(cached_buffers.back().get(), pointer, actual_size);
@@ -103,7 +111,7 @@ idx_t JSONFileHandle::Read(const char *pointer, idx_t requested_size, bool sampl
 		actual_size += ReadFromCache(pointer, requested_size, read_position);
 	}
 	if (requested_size != 0) {
-		actual_size += file_handle->Read((void *)pointer, requested_size);
+		actual_size += ReadInternal(pointer, requested_size);
 	}
 	return actual_size;
 }
@@ -111,7 +119,10 @@ idx_t JSONFileHandle::Read(const char *pointer, idx_t requested_size, bool sampl
 idx_t JSONFileHandle::ReadFromCache(const char *&pointer, idx_t &size, idx_t &position) {
 	idx_t read_size = 0;
 	idx_t total_offset = 0;
-	for (auto &cached_buffer : cached_buffers) {
+	idx_t cached_buffer_idx;
+	for (cached_buffer_idx = 0; cached_buffer_idx < cached_buffers.size(); cached_buffer_idx++) {
+		auto &cached_buffer = cached_buffers[cached_buffer_idx];
 		if (size == 0) {
 			break;
 		}
@@ -127,9 +138,23 @@ idx_t JSONFileHandle::ReadFromCache(const char *&pointer, idx_t &size, idx_t &po
 		}
 		total_offset += cached_buffer.GetSize();
 	}
 	return read_size;
 }
+idx_t JSONFileHandle::ReadInternal(const char *pointer, const idx_t requested_size) {
+	// Deal with reading from pipes
+	idx_t total_read_size = 0;
+	while (total_read_size < requested_size) {
+		auto read_size = file_handle->Read((void *)(pointer + total_read_size), requested_size - total_read_size);
+		if (read_size == 0) {
+			break;
+		}
+		total_read_size += read_size;
+	}
+	return total_read_size;
+}
 BufferedJSONReader::BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options_p, string file_path_p)
     : file_path(std::move(file_path_p)), context(context), options(std::move(options_p)), buffer_index(0) {
 }
@@ -143,6 +168,16 @@ void BufferedJSONReader::OpenJSONFile() {
 	file_handle = make_unique<JSONFileHandle>(std::move(regular_file_handle), BufferAllocator::Get(context));
 }
+void BufferedJSONReader::CloseJSONFile() {
+	while (true) {
+		lock_guard<mutex> guard(lock);
+		if (file_handle->RequestedReadsComplete()) {
+			file_handle->Close();
+			break;
+		}
+	}
+}
 bool BufferedJSONReader::IsOpen() {
 	return file_handle != nullptr;
 }
@@ -246,9 +281,15 @@ void BufferedJSONReader::Reset() {
 void JSONFileHandle::Reset() {
 	read_position = 0;
+	requested_reads = 0;
+	actual_reads = 0;
 	if (plain_file_source) {
 		file_handle->Reset();
 	}
 }
+bool JSONFileHandle::RequestedReadsComplete() {
+	return requested_reads == actual_reads;
+}
 } // namespace duckdb

package/src/duckdb/extension/json/include/buffered_json_reader.hpp CHANGED Viewed

@@ -21,7 +21,7 @@ enum class JSONFormat : uint8_t {
 	AUTO_DETECT = 0,
 	//! One object after another, newlines can be anywhere
 	UNSTRUCTURED = 1,
-	//! Objects are separated by newlines, newlines do not occur within objects (NDJSON)
+	//! Objects are separated by newlines, newlines do not occur within values (NDJSON)
 	NEWLINE_DELIMITED = 2,
 };
@@ -58,11 +58,11 @@ public:
 struct JSONFileHandle {
 public:
 	JSONFileHandle(unique_ptr<FileHandle> file_handle, Allocator &allocator);
+	void Close();
 	idx_t FileSize() const;
 	idx_t Remaining() const;
-	bool PlainFileSource() const;
 	bool CanSeek() const;
 	void Seek(idx_t position);
@@ -71,9 +71,11 @@ public:
 	idx_t Read(const char *pointer, idx_t requested_size, bool sample_run);
 	void Reset();
+	bool RequestedReadsComplete();
 private:
 	idx_t ReadFromCache(const char *&pointer, idx_t &size, idx_t &position);
+	idx_t ReadInternal(const char *pointer, const idx_t requested_size);
 private:
 	//! The JSON file handle
@@ -87,6 +89,8 @@ private:
 	//! Read properties
 	idx_t read_position;
+	idx_t requested_reads;
+	atomic<idx_t> actual_reads;
 	//! Cached buffers for resetting when reading stream
 	vector<AllocatedData> cached_buffers;
@@ -98,6 +102,7 @@ public:
 	BufferedJSONReader(ClientContext &context, BufferedJSONReaderOptions options, string file_path);
 	void OpenJSONFile();
+	void CloseJSONFile();
 	bool IsOpen();
 	BufferedJSONReaderOptions &GetOptions();

package/src/duckdb/extension/json/include/json_scan.hpp CHANGED Viewed

@@ -20,12 +20,25 @@ enum class JSONScanType : uint8_t {
 	INVALID = 0,
 	//! Read JSON straight to columnar data
 	READ_JSON = 1,
-	//! Read JSON objects as strings
+	//! Read JSON values as strings
 	READ_JSON_OBJECTS = 2,
 	//! Sample run for schema detection
 	SAMPLE = 3,
 };
+enum class JSONRecordType : uint8_t {
+	//! Sequential values
+	RECORDS = 0,
+	//! Array of values
+	ARRAY_OF_RECORDS = 1,
+	//! Sequential non-object JSON
+	JSON = 2,
+	//! Array of non-object JSON
+	ARRAY_OF_JSON = 3,
+	//! Auto-detect
+	AUTO = 4,
+};
 //! Even though LogicalTypeId is just a uint8_t, this is still needed ...
 struct LogicalTypeIdHash {
 	inline std::size_t operator()(const LogicalTypeId &id) const {
@@ -104,8 +117,8 @@ public:
 	vector<idx_t> valid_cols;
 	//! Max depth we go to detect nested JSON schema (defaults to unlimited)
 	idx_t max_depth = NumericLimits<idx_t>::Maximum();
-	//! Whether we're parsing objects (usually), or something else like arrays
-	bool objects = true;
+	//! Whether we're parsing values (usually), or something else
+	JSONRecordType record_type = JSONRecordType::RECORDS;
 	//! Forced date/timestamp formats
 	string date_format;
 	string timestamp_format;
@@ -119,12 +132,13 @@ public:
 struct JSONScanInfo : public TableFunctionInfo {
 public:
 	explicit JSONScanInfo(JSONScanType type_p = JSONScanType::INVALID, JSONFormat format_p = JSONFormat::AUTO_DETECT,
-	                      bool auto_detect_p = false)
-	    : type(type_p), format(format_p), auto_detect(auto_detect_p) {
+	                      JSONRecordType record_type_p = JSONRecordType::AUTO, bool auto_detect_p = false)
+	    : type(type_p), format(format_p), record_type(record_type_p), auto_detect(auto_detect_p) {
 	}
 	JSONScanType type;
 	JSONFormat format;
+	JSONRecordType record_type;
 	bool auto_detect;
 };
@@ -179,10 +193,15 @@ public:
 public:
 	idx_t ReadNext(JSONScanGlobalState &gstate);
 	yyjson_alc *GetAllocator();
-	void ThrowTransformError(idx_t count, idx_t object_index, const string &error_message);
+	void ThrowTransformError(idx_t object_index, const string &error_message);
+	idx_t scan_count;
 	JSONLine lines[STANDARD_VECTOR_SIZE];
-	yyjson_val *objects[STANDARD_VECTOR_SIZE];
+	yyjson_val *values[STANDARD_VECTOR_SIZE];
+	idx_t array_idx;
+	idx_t array_offset;
+	yyjson_val *array_values[STANDARD_VECTOR_SIZE];
 	idx_t batch_index;
@@ -192,6 +211,7 @@ public:
 private:
 	yyjson_val *ParseLine(char *line_start, idx_t line_size, idx_t remaining, JSONLine &line);
+	idx_t GetObjectsFromArray(JSONScanGlobalState &gstate);
 private:
 	//! Bind data
@@ -212,7 +232,7 @@ private:
 	idx_t prev_buffer_remainder;
 	idx_t lines_or_objects_in_buffer;
-	//! Buffer to reconstruct split objects
+	//! Buffer to reconstruct split values
 	AllocatedData reconstruct_buffer;
 	//! Copy of current buffer for YYJSON_READ_INSITU
 	AllocatedData current_buffer_copy;
@@ -276,6 +296,21 @@ public:
 		return lstate.GetBatchIndex();
 	}
+	static unique_ptr<NodeStatistics> JSONScanCardinality(ClientContext &context, const FunctionData *bind_data) {
+		auto &data = (JSONScanData &)*bind_data;
+		idx_t per_file_cardinality;
+		if (data.stored_readers.empty()) {
+			// The cardinality of an unknown JSON file is the almighty number 42 except when it's not
+			per_file_cardinality = 42;
+		} else {
+			// If we multiply the almighty number 42 by 10, we get the exact average size of a JSON
+			// Not really, but the average size of a lineitem row in JSON is around 360 bytes
+			per_file_cardinality = data.stored_readers[0]->GetFileHandle().FileSize() / 420;
+		}
+		// Obviously this can be improved but this is better than defaulting to 0
+		return make_unique<NodeStatistics>(per_file_cardinality * data.file_paths.size());
+	}
 	static void JSONScanSerialize(FieldWriter &writer, const FunctionData *bind_data_p, const TableFunction &function) {
 		auto &bind_data = (JSONScanData &)*bind_data_p;
 		bind_data.Serialize(writer);
@@ -291,16 +326,16 @@ public:
 	static void TableFunctionDefaults(TableFunction &table_function) {
 		table_function.named_parameters["maximum_object_size"] = LogicalType::UINTEGER;
 		table_function.named_parameters["ignore_errors"] = LogicalType::BOOLEAN;
-		table_function.named_parameters["format"] = LogicalType::VARCHAR;
+		table_function.named_parameters["lines"] = LogicalType::VARCHAR;
 		table_function.named_parameters["compression"] = LogicalType::VARCHAR;
 		table_function.table_scan_progress = JSONScanProgress;
 		table_function.get_batch_index = JSONScanGetBatchIndex;
+		table_function.cardinality = JSONScanCardinality;
 		table_function.serialize = JSONScanSerialize;
 		table_function.deserialize = JSONScanDeserialize;
-		// TODO: might be able to do some of these
 		table_function.projection_pushdown = false;
 		table_function.filter_pushdown = false;
 		table_function.filter_prune = false;

package/src/duckdb/extension/json/json_functions/copy_json.cpp CHANGED Viewed

@@ -1,7 +1,9 @@
 #include "duckdb/function/copy_function.hpp"
 #include "duckdb/parser/expression/constant_expression.hpp"
 #include "duckdb/parser/expression/function_expression.hpp"
+#include "duckdb/parser/expression/positional_reference_expression.hpp"
 #include "duckdb/parser/query_node/select_node.hpp"
+#include "duckdb/parser/tableref/subqueryref.hpp"
 #include "duckdb/planner/binder.hpp"
 #include "json_functions.hpp"
 #include "json_scan.hpp"
@@ -12,42 +14,52 @@ namespace duckdb {
 static BoundStatement CopyToJSONPlan(Binder &binder, CopyStatement &stmt) {
 	auto stmt_copy = stmt.Copy();
 	auto &copy = (CopyStatement &)*stmt_copy;
-	auto &select_stmt = (SelectNode &)*copy.select_statement;
 	auto &info = *copy.info;
-	// strftime if the user specified a format TODO: deal with date/timestamp within nested types
-	auto date_it = info.options.find("dateformat");
-	auto timestamp_it = info.options.find("timestampformat");
 	// Bind the select statement of the original to resolve the types
 	auto dummy_binder = Binder::CreateBinder(binder.context, &binder, true);
 	auto bound_original = dummy_binder->Bind(*stmt.select_statement);
-	D_ASSERT(bound_original.types.size() == select_stmt.select_list.size());
-	const idx_t num_cols = bound_original.types.size();
-	// This loop also makes sure the columns have an alias (needed for struct_pack)
+	// Create new SelectNode with the original SelectNode as a subquery in the FROM clause
+	auto select_stmt = make_unique<SelectStatement>();
+	select_stmt->node = std::move(copy.select_statement);
+	auto subquery_ref = make_unique<SubqueryRef>(std::move(select_stmt));
+	copy.select_statement = make_unique_base<QueryNode, SelectNode>();
+	auto &new_select_node = (SelectNode &)*copy.select_statement;
+	new_select_node.from_table = std::move(subquery_ref);
+	// Create new select list
+	vector<unique_ptr<ParsedExpression>> select_list;
+	select_list.reserve(bound_original.types.size());
+	// strftime if the user specified a format (loop also gives columns a name, needed for struct_pack)
+	// TODO: deal with date/timestamp within nested types
+	const auto date_it = info.options.find("dateformat");
+	const auto timestamp_it = info.options.find("timestampformat");
 	vector<unique_ptr<ParsedExpression>> strftime_children;
-	for (idx_t i = 0; i < num_cols; i++) {
+	for (idx_t col_idx = 0; col_idx < bound_original.types.size(); col_idx++) {
+		auto column = make_unique_base<ParsedExpression, PositionalReferenceExpression>(col_idx + 1);
 		strftime_children.clear();
-		auto &col = select_stmt.select_list[i];
-		auto name = col->GetName();
-		if (bound_original.types[i] == LogicalTypeId::DATE && date_it != info.options.end()) {
-			strftime_children.emplace_back(std::move(col));
+		const auto &type = bound_original.types[col_idx];
+		const auto &name = bound_original.names[col_idx];
+		if (date_it != info.options.end() && type == LogicalTypeId::DATE) {
+			strftime_children.emplace_back(std::move(column));
 			strftime_children.emplace_back(make_unique<ConstantExpression>(date_it->second.back()));
-			col = make_unique<FunctionExpression>("strftime", std::move(strftime_children));
-		} else if (bound_original.types[i] == LogicalTypeId::TIMESTAMP && timestamp_it != info.options.end()) {
-			strftime_children.emplace_back(std::move(col));
+			column = make_unique<FunctionExpression>("strftime", std::move(strftime_children));
+		} else if (timestamp_it != info.options.end() && type == LogicalTypeId::TIMESTAMP) {
+			strftime_children.emplace_back(std::move(column));
 			strftime_children.emplace_back(make_unique<ConstantExpression>(timestamp_it->second.back()));
-			col = make_unique<FunctionExpression>("strftime", std::move(strftime_children));
+			column = make_unique<FunctionExpression>("strftime", std::move(strftime_children));
 		}
-		col->alias = name;
+		column->alias = name;
+		select_list.emplace_back(std::move(column));
 	}
 	// Now create the struct_pack/to_json to create a JSON object per row
+	auto &select_node = (SelectNode &)*copy.select_statement;
 	vector<unique_ptr<ParsedExpression>> struct_pack_child;
-	struct_pack_child.emplace_back(make_unique<FunctionExpression>("struct_pack", std::move(select_stmt.select_list)));
-	select_stmt.select_list.clear();
-	select_stmt.select_list.emplace_back(make_unique<FunctionExpression>("to_json", std::move(struct_pack_child)));
+	struct_pack_child.emplace_back(make_unique<FunctionExpression>("struct_pack", std::move(select_list)));
+	select_node.select_list.emplace_back(make_unique<FunctionExpression>("to_json", std::move(struct_pack_child)));
 	// Now we can just use the CSV writer
 	info.format = "csv";
@@ -101,7 +113,8 @@ CreateCopyFunctionInfo JSONFunctions::GetJSONCopyFunction() {
 	function.copy_from_bind = CopyFromJSONBind;
 	function.copy_from_function = JSONFunctions::GetReadJSONTableFunction(
-	    false, make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, false));
+	    false,
+	    make_shared<JSONScanInfo>(JSONScanType::READ_JSON, JSONFormat::AUTO_DETECT, JSONRecordType::RECORDS, false));
 	return CreateCopyFunctionInfo(function);
 }

package/src/duckdb/extension/json/json_functions/json_create.cpp CHANGED Viewed

@@ -56,7 +56,7 @@ static LogicalType GetJSONType(unordered_map<string, unique_ptr<Vector>> &const_
 	// The nested types need to conform as well
 	case LogicalTypeId::LIST:
 		return LogicalType::LIST(GetJSONType(const_struct_names, ListType::GetChildType(type)));
-	// Struct and MAP are treated as JSON objects
+	// Struct and MAP are treated as JSON values
 	case LogicalTypeId::STRUCT: {
 		child_list_t<LogicalType> child_types;
 		for (const auto &child_type : StructType::GetChildTypes(type)) {
@@ -247,14 +247,14 @@ static void TemplatedCreateValues(yyjson_mut_doc *doc, yyjson_mut_val *vals[], V
 static void CreateValuesStruct(const JSONCreateFunctionData &info, yyjson_mut_doc *doc, yyjson_mut_val *vals[],
                                Vector &value_v, idx_t count) {
-	// Structs become objects, therefore we initialize vals to JSON objects
+	// Structs become values, therefore we initialize vals to JSON values
 	for (idx_t i = 0; i < count; i++) {
 		vals[i] = yyjson_mut_obj(doc);
 	}
 	// Initialize re-usable array for the nested values
 	auto nested_vals = (yyjson_mut_val **)doc->alc.malloc(doc->alc.ctx, sizeof(yyjson_mut_val *) * count);
-	// Add the key/value pairs to the objects
+	// Add the key/value pairs to the values
 	auto &entries = StructVector::GetEntries(value_v);
 	for (idx_t entry_i = 0; entry_i < entries.size(); entry_i++) {
 		auto &struct_key_v = *info.const_struct_names.at(StructType::GetChildName(value_v.GetType(), entry_i));
@@ -284,7 +284,7 @@ static void CreateValuesMap(const JSONCreateFunctionData &info, yyjson_mut_doc *
 	auto map_val_count = ListVector::GetListSize(value_v);
 	auto nested_vals = (yyjson_mut_val **)doc->alc.malloc(doc->alc.ctx, sizeof(yyjson_mut_val *) * map_val_count);
 	CreateValues(info, doc, nested_vals, map_val_v, map_val_count);
-	// Add the key/value pairs to the objects
+	// Add the key/value pairs to the values
 	UnifiedVectorFormat map_data;
 	value_v.ToUnifiedFormat(count, map_data);
 	auto map_key_list_entries = (list_entry_t *)map_data.data;
@@ -308,7 +308,7 @@ static void CreateValuesMap(const JSONCreateFunctionData &info, yyjson_mut_doc *
 static void CreateValuesUnion(const JSONCreateFunctionData &info, yyjson_mut_doc *doc, yyjson_mut_val *vals[],
                               Vector &value_v, idx_t count) {
-	// Structs become objects, therefore we initialize vals to JSON objects
+	// Structs become values, therefore we initialize vals to JSON values
 	for (idx_t i = 0; i < count; i++) {
 		vals[i] = yyjson_mut_obj(doc);
 	}
@@ -320,7 +320,7 @@ static void CreateValuesUnion(const JSONCreateFunctionData &info, yyjson_mut_doc
 	UnifiedVectorFormat tag_data;
 	tag_v.ToUnifiedFormat(count, tag_data);
-	// Add the key/value pairs to the objects
+	// Add the key/value pairs to the values
 	for (idx_t member_idx = 0; member_idx < UnionType::GetMemberCount(value_v.GetType()); member_idx++) {
 		auto &member_val_v = UnionVector::GetMember(value_v, member_idx);
 		auto &member_key_v = *info.const_struct_names.at(UnionType::GetMemberName(value_v.GetType(), member_idx));
@@ -425,7 +425,7 @@ static void ObjectFunction(DataChunk &args, ExpressionState &state, Vector &resu
 	auto &lstate = JSONFunctionLocalState::ResetAndGet(state);
 	auto alc = lstate.json_allocator.GetYYJSONAllocator();
-	// Initialize objects
+	// Initialize values
 	const idx_t count = args.size();
 	auto doc = JSONCommon::CreateDocument(alc);
 	yyjson_mut_val *objs[STANDARD_VECTOR_SIZE];
@@ -440,7 +440,7 @@ static void ObjectFunction(DataChunk &args, ExpressionState &state, Vector &resu
 		Vector &value_v = args.data[pair_idx * 2 + 1];
 		CreateKeyValuePairs(info, doc, objs, vals, key_v, value_v, count);
 	}
-	// Write JSON objects to string
+	// Write JSON values to string
 	auto objects = FlatVector::GetData<string_t>(result);
 	for (idx_t i = 0; i < count; i++) {
 		objects[i] = JSONCommon::WriteVal<yyjson_mut_val>(objs[i], alc);

package/src/duckdb/extension/json/json_functions/json_structure.cpp CHANGED Viewed

@@ -214,9 +214,6 @@ void JSONStructureNode::RefineCandidateTypesObject(yyjson_val *vals[], idx_t cou
 		}
 	}
-	if (count > STANDARD_VECTOR_SIZE) {
-		string_vector.Initialize(false, count);
-	}
 	for (idx_t child_idx = 0; child_idx < child_count; child_idx++) {
 		desc.children[child_idx].RefineCandidateTypes(child_vals[child_idx], count, string_vector, allocator,
 		                                              date_format_map);
@@ -431,6 +428,10 @@ static inline yyjson_mut_val *ConvertStructureArray(const JSONStructureNode &nod
 static inline yyjson_mut_val *ConvertStructureObject(const JSONStructureNode &node, yyjson_mut_doc *doc) {
 	D_ASSERT(node.descriptions.size() == 1 && node.descriptions[0].type == LogicalTypeId::STRUCT);
 	auto &desc = node.descriptions[0];
+	if (desc.children.empty()) {
+		// Empty struct - let's do JSON instead
+		return yyjson_mut_str(doc, JSONCommon::JSON_TYPE_NAME);
+	}
 	auto obj = yyjson_mut_obj(doc);
 	for (auto &child : desc.children) {
@@ -495,6 +496,10 @@ static LogicalType StructureToTypeObject(ClientContext &context, const JSONStruc
                                          idx_t depth) {
 	D_ASSERT(node.descriptions.size() == 1 && node.descriptions[0].type == LogicalTypeId::STRUCT);
 	auto &desc = node.descriptions[0];
+	if (desc.children.empty()) {
+		// Empty struct - let's do JSON instead
+		return JSONCommon::JSONType();
+	}
 	child_list_t<LogicalType> child_types;
 	child_types.reserve(desc.children.size());