npm - duckdb - Versions diffs - 0.9.2-dev9.0 → 0.9.2 - Mend

duckdb 0.9.2-dev9.0 → 0.9.2

Files changed (122) hide show

package/.github/workflows/HighPriorityIssues.yml ADDED Viewed

@@ -0,0 +1,36 @@
+name: Create Internal issue when the "High Priority" label is applied
+on:
+  issues:
+    types:
+      - labeled
+env:
+  GH_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }}
+  # an event triggering this workflow is either an issue or a pull request,
+  # hence only one of the numbers will be filled in the TITLE_PREFIX
+  TITLE_PREFIX: "[duckdb-node/#${{ github.event.issue.number }}]"
+  PUBLIC_ISSUE_TITLE: ${{ github.event.issue.title }}
+jobs:
+  create_or_label_issue:
+    if: github.event.label.name == 'High Priority'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Get mirror issue number
+        run: |
+          gh issue list --repo duckdblabs/duckdb-internal --search "${TITLE_PREFIX}" --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt
+          echo "MIRROR_ISSUE_NUMBER=$(cat mirror_issue_number.txt)" >> $GITHUB_ENV
+      - name: Print whether mirror issue exists
+        run: |
+          if [ "$MIRROR_ISSUE_NUMBER" == "" ]; then
+            echo "Mirror issue with title prefix '$TITLE_PREFIX' does not exist yet"
+          else
+            echo "Mirror issue with title prefix '$TITLE_PREFIX' exists with number $MIRROR_ISSUE_NUMBER"
+          fi
+      - name: Create or label issue
+        run: |
+          if [ "$MIRROR_ISSUE_NUMBER" == "" ]; then
+            gh issue create --repo duckdblabs/duckdb-internal --label "Node.js" --label "High Priority" --title "$TITLE_PREFIX - $PUBLIC_ISSUE_TITLE" --body "See https://github.com/duckdb/duckdb-node/issues/${{ github.event.issue.number }}"
+          fi

package/.github/workflows/NodeJS.yml CHANGED Viewed

@@ -2,6 +2,8 @@ name: NodeJS
 on:
   push:
   pull_request:
+  workflow_dispatch:
+  repository_dispatch:
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || '' }}-${{ github.base_ref || '' }}-${{ github.ref != 'refs/heads/main' || github.sha }}
@@ -34,55 +36,36 @@ jobs:
     name: node.js Linux
     runs-on: ubuntu-20.04
     needs: set-up-npm
+    continue-on-error: ${{ matrix.node != '18' && matrix.node != '20' && matrix.node != '21' }}
     env:
       TARGET_ARCH: ${{ matrix.target_arch }}
       DUCKDB_NODE_BUILD_CACHE: 0
     strategy:
       matrix:
         # node.js current support policy to be found at https://github.com/duckdb/duckdb-node/tree/main/#Supported-Node-versions
-        node: [ '12', '14', '16', '17', '18', '19', '20' ]
+        node: [ '12', '14', '16', '17', '18', '19', '20', '21']
         target_arch: [ x64, arm64 ]
         isRelease:
           - ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
         exclude:
           - isRelease: false
             node: 12
-            target_arch: x64
           - isRelease: false
             node: 14
-            target_arch: x64
           - isRelease: false
             node: 16
-            target_arch: x64
           - isRelease: false
             node: 17
-            target_arch: x64
-          - isRelease: false
-            node: 18
-            target_arch: x64
           - isRelease: false
             node: 19
-            target_arch: x64
-          - isRelease: false
-            node: 12
-            target_arch: arm64
-          - isRelease: false
-            node: 14
-            target_arch: arm64
-          - isRelease: false
-            node: 16
-            target_arch: arm64
-          - isRelease: false
-            node: 17
-            target_arch: arm64
           - isRelease: false
             node: 18
             target_arch: arm64
           - isRelease: false
-            node: 19
+            node: 20
             target_arch: arm64
           - isRelease: false
-            node: 20
+            node: 21
             target_arch: arm64
     steps:
@@ -127,10 +110,11 @@ jobs:
     name: node.js OSX
     runs-on: macos-latest
     needs: linux-nodejs
+    continue-on-error: ${{ matrix.node != '18' && matrix.node != '20' && matrix.node != '21' }}
     strategy:
       matrix:
         target_arch: [ x64, arm64 ]
-        node: [ '12', '14', '16', '17', '18', '19', '20' ]
+        node: [ '12', '14', '16', '17', '18', '19', '20', '21']
         isRelease:
           - ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
         exclude:
@@ -143,7 +127,7 @@ jobs:
           - isRelease: false
             node: 17
           - isRelease: false
-            node: 18
+            node: 19
           - target_arch: arm64
             node: 12
           - target_arch: arm64
@@ -158,6 +142,11 @@ jobs:
         with:
           fetch-depth: 0
+      # Default Python (3.12) doesn't have support for distutils
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
       - name: Setup Ccache
         uses: hendrikmuhs/ccache-action@main
         with:
@@ -184,12 +173,13 @@ jobs:
     name: node.js Windows
     runs-on: windows-latest
     needs: linux-nodejs
+    continue-on-error: ${{ matrix.node != '18' && matrix.node != '20' && matrix.node != '21' }}
     env:
       npm_config_msvs_version: 2019
     strategy:
       matrix:
-        node: [ '12', '14', '16', '17', '18', '19', '20' ]
+        node: [ '12', '14', '16', '17', '18', '19', '20', '21']
         isRelease:
           - ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
         exclude:
@@ -205,6 +195,8 @@ jobs:
             node: 18
           - isRelease: false
             node: 19
+          - isRelease: false
+            node: 20
     steps:
       - uses: actions/setup-python@v4

package/README.md CHANGED Viewed

@@ -101,10 +101,10 @@ var stmt = con.prepare('select ?::INTEGER as fortytwo', function(err, stmt) {
 ```
 ## Supported Node versions
-We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node 16, Node 18 and Node 20.
+We actively support only LTS and In-Support Node versions, as per July 2023, they are: Node 18, Node 20 and Node 21.
 Release schedule for Node.js can be checked here: https://github.com/nodejs/release#release-schedule.
-We currently bundle and test DuckDB also for Node 10, 12, 14, 17 and 19. We plan of going so going forward as long as the tooling supports it.
+We currently bundle and test DuckDB also for Node 10, 12, 14, 16, 17 and 19. We plan of going so going forward as long as the tooling supports it.
 As per July 2023, Node 15 has been removed from the supported versions.
 ## Development

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "duckdb",
   "main": "./lib/duckdb.js",
   "types": "./lib/duckdb.d.ts",
-  "version": "0.9.2-dev9.0",
+  "version": "0.9.2",
   "description": "DuckDB node.js API",
   "gypfile": true,
   "dependencies": {

package/src/duckdb/extension/icu/icu-timebucket.cpp CHANGED Viewed

@@ -76,24 +76,21 @@ struct ICUTimeBucket : public ICUDateFunc {
 	static inline timestamp_t WidthConvertibleToDaysCommon(int32_t bucket_width_days, const timestamp_t ts,
 	                                                       const timestamp_t origin, icu::Calendar *calendar) {
-		const auto trunc_days = TruncationFactory(DatePartSpecifier::DAY);
 		const auto sub_days = SubtractFactory(DatePartSpecifier::DAY);
-		uint64_t tmp_micros = SetTime(calendar, ts);
-		trunc_days(calendar, tmp_micros);
-		timestamp_t truncated_ts = GetTimeUnsafe(calendar, tmp_micros);
-		int64_t ts_days = sub_days(calendar, origin, truncated_ts);
+		int64_t ts_days = sub_days(calendar, origin, ts);
 		int64_t result_days = (ts_days / bucket_width_days) * bucket_width_days;
 		if (result_days < NumericLimits<int32_t>::Minimum() || result_days > NumericLimits<int32_t>::Maximum()) {
 			throw OutOfRangeException("Timestamp out of range");
 		}
-		if (ts_days < 0 && ts_days % bucket_width_days != 0) {
-			result_days =
-			    SubtractOperatorOverflowCheck::Operation<int32_t, int32_t, int32_t>(result_days, bucket_width_days);
+		timestamp_t bucket = Add(calendar, origin, interval_t {0, static_cast<int32_t>(result_days), 0});
+		if (ts < bucket) {
+			D_ASSERT(ts < origin);
+			bucket = Add(calendar, bucket, interval_t {0, -bucket_width_days, 0});
+			D_ASSERT(ts > bucket);
 		}
-		return Add(calendar, origin, interval_t {0, static_cast<int32_t>(result_days), 0});
+		return bucket;
 	}
 	static inline timestamp_t WidthConvertibleToMonthsCommon(int32_t bucket_width_months, const timestamp_t ts,

package/src/duckdb/extension/icu/icu-timezone.cpp CHANGED Viewed

@@ -81,6 +81,9 @@ static void ICUTimeZoneFunction(ClientContext &context, TableFunctionInput &data
 			break;
 		}
+		//	What PG reports is the total offset for today,
+		//	which is the ICU total offset (i.e., "raw") plus the DST offset.
+		raw_offset_ms += dst_offset_ms;
 		output.SetValue(2, index, Value::INTERVAL(Interval::FromMicro(raw_offset_ms * Interval::MICROS_PER_MSEC)));
 		output.SetValue(3, index, Value(dst_offset_ms != 0));
 		++index;

package/src/duckdb/extension/json/buffered_json_reader.cpp CHANGED Viewed

@@ -23,7 +23,7 @@ bool JSONFileHandle::IsOpen() const {
 }
 void JSONFileHandle::Close() {
-	if (IsOpen() && file_handle->OnDiskFile()) {
+	if (IsOpen() && !file_handle->IsPipe()) {
 		file_handle->Close();
 		file_handle = nullptr;
 	}
@@ -72,30 +72,23 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
 	D_ASSERT(size != 0);
 	if (plain_file_source) {
 		file_handle->Read(pointer, size, position);
-		actual_reads++;
-		return;
-	}
-	if (sample_run) { // Cache the buffer
+	} else if (sample_run) { // Cache the buffer
 		file_handle->Read(pointer, size, position);
-		actual_reads++;
 		cached_buffers.emplace_back(allocator.Allocate(size));
 		memcpy(cached_buffers.back().get(), pointer, size);
 		cached_size += size;
+	} else {
+		if (!cached_buffers.empty() || position < cached_size) {
+			ReadFromCache(pointer, size, position);
+		}
-		return;
-	}
-	if (!cached_buffers.empty() || position < cached_size) {
-		ReadFromCache(pointer, size, position);
-		actual_reads++;
+		if (size != 0) {
+			file_handle->Read(pointer, size, position);
+		}
 	}
-	if (size != 0) {
-		file_handle->Read(pointer, size, position);
-		actual_reads++;
+	if (++actual_reads > requested_reads) {
+		throw InternalException("JSONFileHandle performed more actual reads than requested reads");
 	}
 }

package/src/duckdb/extension/json/json_scan.cpp CHANGED Viewed

@@ -214,17 +214,22 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
 idx_t JSONGlobalTableFunctionState::MaxThreads() const {
 	auto &bind_data = state.bind_data;
-	if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
-		return state.system_threads;
-	}
 	if (!state.json_readers.empty() && state.json_readers[0]->HasFileHandle()) {
+		// We opened and auto-detected a file, so we can get a better estimate
 		auto &reader = *state.json_readers[0];
-		if (reader.GetFormat() == JSONFormat::NEWLINE_DELIMITED) { // Auto-detected NDJSON
-			return state.system_threads;
+		if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED ||
+		    reader.GetFormat() == JSONFormat::NEWLINE_DELIMITED) {
+			return MaxValue<idx_t>(state.json_readers[0]->GetFileHandle().FileSize() / bind_data.maximum_object_size,
+			                       1);
 		}
 	}
+	if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
+		// We haven't opened any files, so this is our best bet
+		return state.system_threads;
+	}
 	// One reader per file
 	return bind_data.files.size();
 }

package/src/duckdb/extension/parquet/parquet_extension.cpp CHANGED Viewed

@@ -740,8 +740,8 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
 	}
 }
-unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
-                                          vector<LogicalType> &sql_types) {
+unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, const CopyInfo &info, const vector<string> &names,
+                                          const vector<LogicalType> &sql_types) {
 	D_ASSERT(names.size() == sql_types.size());
 	bool row_group_size_bytes_set = false;
 	auto bind_data = make_uniq<ParquetWriteBindData>();

package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp CHANGED Viewed

@@ -32,6 +32,7 @@ unique_ptr<CreateInfo> ViewCatalogEntry::GetInfo() const {
 	result->query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
 	result->aliases = aliases;
 	result->types = types;
+	result->temporary = temporary;
 	return std::move(result);
 }
@@ -58,23 +59,16 @@ string ViewCatalogEntry::ToSQL() const {
 		//! Return empty sql with view name so pragma view_tables don't complain
 		return sql;
 	}
-	return sql + "\n;";
+	auto info = GetInfo();
+	auto result = info->ToString();
+	return result + ";\n";
 }
 unique_ptr<CatalogEntry> ViewCatalogEntry::Copy(ClientContext &context) const {
 	D_ASSERT(!internal);
-	CreateViewInfo create_info(schema, name);
-	create_info.query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
-	for (idx_t i = 0; i < aliases.size(); i++) {
-		create_info.aliases.push_back(aliases[i]);
-	}
-	for (idx_t i = 0; i < types.size(); i++) {
-		create_info.types.push_back(types[i]);
-	}
-	create_info.temporary = temporary;
-	create_info.sql = sql;
+	auto create_info = GetInfo();
-	return make_uniq<ViewCatalogEntry>(catalog, schema, create_info);
+	return make_uniq<ViewCatalogEntry>(catalog, schema, create_info->Cast<CreateViewInfo>());
 }
 } // namespace duckdb

package/src/duckdb/src/catalog/catalog_set.cpp CHANGED Viewed

@@ -199,6 +199,8 @@ bool CatalogSet::AlterOwnership(CatalogTransaction transaction, ChangeOwnershipI
 bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name, AlterInfo &alter_info) {
 	// lock the catalog for writing
 	lock_guard<mutex> write_lock(catalog.GetWriteLock());
+	// lock this catalog set to disallow reading
+	lock_guard<mutex> read_lock(catalog_lock);
 	// first check if the entry exists in the unordered set
 	EntryIndex entry_index;
@@ -210,9 +212,6 @@ bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name,
 		throw CatalogException("Cannot alter entry \"%s\" because it is an internal system entry", entry->name);
 	}
-	// lock this catalog set to disallow reading
-	lock_guard<mutex> read_lock(catalog_lock);
 	// create a new entry and replace the currently stored one
 	// set the timestamp to the timestamp of the current transaction
 	// and point it to the updated table node
@@ -316,6 +315,7 @@ void CatalogSet::DropEntryInternal(CatalogTransaction transaction, EntryIndex en
 bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, bool cascade, bool allow_drop_internal) {
 	// lock the catalog for writing
 	lock_guard<mutex> write_lock(catalog.GetWriteLock());
+	lock_guard<mutex> read_lock(catalog_lock);
 	// we can only delete an entry that exists
 	EntryIndex entry_index;
 	auto entry = GetEntryInternal(transaction, name, &entry_index);
@@ -326,7 +326,6 @@ bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, b
 		throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name);
 	}
-	lock_guard<mutex> read_lock(catalog_lock);
 	DropEntryInternal(transaction, std::move(entry_index), *entry, cascade);
 	return true;
 }

package/src/duckdb/src/common/arrow/appender/union_data.cpp CHANGED Viewed

@@ -24,7 +24,7 @@ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t f
 	duckdb::vector<Vector> child_vectors;
 	for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
-		child_vectors.emplace_back(child.second);
+		child_vectors.emplace_back(child.second, size);
 	}
 	for (idx_t input_idx = from; input_idx < to; input_idx++) {

package/src/duckdb/src/common/arrow/arrow_appender.cpp CHANGED Viewed

@@ -193,26 +193,26 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
 		if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
 			InitializeAppenderForType<ArrowVarcharData<string_t>>(append_data);
 		} else {
-			InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter, uint32_t>>(append_data);
+			InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter, int32_t>>(append_data);
 		}
 		break;
 	case LogicalTypeId::UUID:
 		if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
 			InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter>>(append_data);
 		} else {
-			InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, uint32_t>>(append_data);
+			InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, int32_t>>(append_data);
 		}
 		break;
 	case LogicalTypeId::ENUM:
 		switch (type.InternalType()) {
 		case PhysicalType::UINT8:
-			InitializeAppenderForType<ArrowEnumData<uint8_t>>(append_data);
+			InitializeAppenderForType<ArrowEnumData<int8_t>>(append_data);
 			break;
 		case PhysicalType::UINT16:
-			InitializeAppenderForType<ArrowEnumData<uint16_t>>(append_data);
+			InitializeAppenderForType<ArrowEnumData<int16_t>>(append_data);
 			break;
 		case PhysicalType::UINT32:
-			InitializeAppenderForType<ArrowEnumData<uint32_t>>(append_data);
+			InitializeAppenderForType<ArrowEnumData<int32_t>>(append_data);
 			break;
 		default:
 			throw InternalException("Unsupported internal enum type");
@@ -227,11 +227,20 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
 	case LogicalTypeId::STRUCT:
 		InitializeAppenderForType<ArrowStructData>(append_data);
 		break;
-	case LogicalTypeId::LIST:
-		InitializeAppenderForType<ArrowListData>(append_data);
+	case LogicalTypeId::LIST: {
+		if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
+			InitializeAppenderForType<ArrowListData<int64_t>>(append_data);
+		} else {
+			InitializeAppenderForType<ArrowListData<int32_t>>(append_data);
+		}
 		break;
+	}
 	case LogicalTypeId::MAP:
-		InitializeAppenderForType<ArrowMapData>(append_data);
+		if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
+			InitializeAppenderForType<ArrowMapData<int64_t>>(append_data);
+		} else {
+			InitializeAppenderForType<ArrowMapData<int32_t>>(append_data);
+		}
 		break;
 	default:
 		throw NotImplementedException("Unsupported type in DuckDB -> Arrow Conversion: %s\n", type.ToString());

package/src/duckdb/src/common/arrow/arrow_converter.cpp CHANGED Viewed

@@ -187,7 +187,11 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
 		break;
 	}
 	case LogicalTypeId::LIST: {
-		child.format = "+l";
+		if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
+			child.format = "+L";
+		} else {
+			child.format = "+l";
+		}
 		child.n_children = 1;
 		root_holder.nested_children.emplace_back();
 		root_holder.nested_children.back().resize(1);

package/src/duckdb/src/common/enum_util.cpp CHANGED Viewed

@@ -64,6 +64,7 @@
 #include "duckdb/common/types/timestamp.hpp"
 #include "duckdb/common/types/vector.hpp"
 #include "duckdb/common/types/vector_buffer.hpp"
+#include "duckdb/core_functions/aggregate/quantile_enum.hpp"
 #include "duckdb/execution/index/art/art.hpp"
 #include "duckdb/execution/index/art/node.hpp"
 #include "duckdb/execution/operator/scan/csv/base_csv_reader.hpp"
@@ -4571,6 +4572,44 @@ ProfilerPrintFormat EnumUtil::FromString<ProfilerPrintFormat>(const char *value)
 	throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
 }
+template<>
+const char* EnumUtil::ToChars<QuantileSerializationType>(QuantileSerializationType value) {
+	switch(value) {
+	case QuantileSerializationType::NON_DECIMAL:
+		return "NON_DECIMAL";
+	case QuantileSerializationType::DECIMAL_DISCRETE:
+		return "DECIMAL_DISCRETE";
+	case QuantileSerializationType::DECIMAL_DISCRETE_LIST:
+		return "DECIMAL_DISCRETE_LIST";
+	case QuantileSerializationType::DECIMAL_CONTINUOUS:
+		return "DECIMAL_CONTINUOUS";
+	case QuantileSerializationType::DECIMAL_CONTINUOUS_LIST:
+		return "DECIMAL_CONTINUOUS_LIST";
+	default:
+		throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
+	}
+}
+template<>
+QuantileSerializationType EnumUtil::FromString<QuantileSerializationType>(const char *value) {
+	if (StringUtil::Equals(value, "NON_DECIMAL")) {
+		return QuantileSerializationType::NON_DECIMAL;
+	}
+	if (StringUtil::Equals(value, "DECIMAL_DISCRETE")) {
+		return QuantileSerializationType::DECIMAL_DISCRETE;
+	}
+	if (StringUtil::Equals(value, "DECIMAL_DISCRETE_LIST")) {
+		return QuantileSerializationType::DECIMAL_DISCRETE_LIST;
+	}
+	if (StringUtil::Equals(value, "DECIMAL_CONTINUOUS")) {
+		return QuantileSerializationType::DECIMAL_CONTINUOUS;
+	}
+	if (StringUtil::Equals(value, "DECIMAL_CONTINUOUS_LIST")) {
+		return QuantileSerializationType::DECIMAL_CONTINUOUS_LIST;
+	}
+	throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
+}
 template<>
 const char* EnumUtil::ToChars<QueryNodeType>(QueryNodeType value) {
 	switch(value) {
@@ -5118,6 +5157,29 @@ SinkFinalizeType EnumUtil::FromString<SinkFinalizeType>(const char *value) {
 	throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
 }
+template<>
+const char* EnumUtil::ToChars<SinkNextBatchType>(SinkNextBatchType value) {
+	switch(value) {
+	case SinkNextBatchType::READY:
+		return "READY";
+	case SinkNextBatchType::BLOCKED:
+		return "BLOCKED";
+	default:
+		throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
+	}
+}
+template<>
+SinkNextBatchType EnumUtil::FromString<SinkNextBatchType>(const char *value) {
+	if (StringUtil::Equals(value, "READY")) {
+		return SinkNextBatchType::READY;
+	}
+	if (StringUtil::Equals(value, "BLOCKED")) {
+		return SinkNextBatchType::BLOCKED;
+	}
+	throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
+}
 template<>
 const char* EnumUtil::ToChars<SinkResultType>(SinkResultType value) {
 	switch(value) {
@@ -6010,6 +6072,8 @@ const char* EnumUtil::ToChars<UnionInvalidReason>(UnionInvalidReason value) {
 		return "VALIDITY_OVERLAP";
 	case UnionInvalidReason::TAG_MISMATCH:
 		return "TAG_MISMATCH";
+	case UnionInvalidReason::NULL_TAG:
+		return "NULL_TAG";
 	default:
 		throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
 	}
@@ -6032,6 +6096,9 @@ UnionInvalidReason EnumUtil::FromString<UnionInvalidReason>(const char *value) {
 	if (StringUtil::Equals(value, "TAG_MISMATCH")) {
 		return UnionInvalidReason::TAG_MISMATCH;
 	}
+	if (StringUtil::Equals(value, "NULL_TAG")) {
+		return UnionInvalidReason::NULL_TAG;
+	}
 	throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
 }

package/src/duckdb/src/common/file_system.cpp CHANGED Viewed

@@ -344,7 +344,7 @@ bool FileSystem::FileExists(const string &filename) {
 }
 bool FileSystem::IsPipe(const string &filename) {
-	throw NotImplementedException("%s: IsPipe is not implemented!", GetName());
+	return false;
 }
 void FileSystem::RemoveFile(const string &filename) {
@@ -500,6 +500,10 @@ bool FileHandle::CanSeek() {
 	return file_system.CanSeek();
 }
+bool FileHandle::IsPipe() {
+	return file_system.IsPipe(path);
+}
 string FileHandle::ReadLine() {
 	string result;
 	char buffer[1];

package/src/duckdb/src/common/hive_partitioning.cpp CHANGED Viewed

@@ -64,7 +64,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
 // 	- s3://bucket/var1=value1/bla/bla/var2=value2
 //  - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
 //  - folder/folder/folder/../var1=value1/etc/.//var2=value2
-const string HivePartitioning::REGEX_STRING = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
+const string &HivePartitioning::RegexString() {
+	static string REGEX = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
+	return REGEX;
+}
 std::map<string, string> HivePartitioning::Parse(const string &filename, duckdb_re2::RE2 &regex) {
 	std::map<string, string> result;
@@ -79,7 +82,7 @@ std::map<string, string> HivePartitioning::Parse(const string &filename, duckdb_
 }
 std::map<string, string> HivePartitioning::Parse(const string &filename) {
-	duckdb_re2::RE2 regex(REGEX_STRING);
+	duckdb_re2::RE2 regex(RegexString());
 	return Parse(filename, regex);
 }
@@ -94,7 +97,7 @@ void HivePartitioning::ApplyFiltersToFileList(ClientContext &context, vector<str
 	vector<bool> have_preserved_filter(filters.size(), false);
 	vector<unique_ptr<Expression>> pruned_filters;
 	unordered_set<idx_t> filters_applied_to_files;
-	duckdb_re2::RE2 regex(REGEX_STRING);
+	duckdb_re2::RE2 regex(RegexString());
 	auto table_index = get.table_index;
 	if ((!filename_enabled && !hive_enabled) || filters.empty()) {

package/src/duckdb/src/common/multi_file_reader.cpp CHANGED Viewed

@@ -102,7 +102,9 @@ bool MultiFileReader::ComplexFilterPushdown(ClientContext &context, vector<strin
 	unordered_map<string, column_t> column_map;
 	for (idx_t i = 0; i < get.column_ids.size(); i++) {
-		column_map.insert({get.names[get.column_ids[i]], i});
+		if (!IsRowIdColumnId(get.column_ids[i])) {
+			column_map.insert({get.names[get.column_ids[i]], i});
+		}
 	}
 	auto start_files = files.size();
@@ -432,7 +434,7 @@ void MultiFileReaderOptions::AutoDetectHiveTypesInternal(const string &file, Cli
 		}
 		Value value(part.second);
 		for (auto &candidate : candidates) {
-			const bool success = value.TryCastAs(context, candidate);
+			const bool success = value.TryCastAs(context, candidate, true);
 			if (success) {
 				hive_types_schema[name] = candidate;
 				break;

package/src/duckdb/src/common/types/list_segment.cpp CHANGED Viewed

@@ -462,6 +462,10 @@ void SegmentPrimitiveFunction(ListSegmentFunctions &functions) {
 void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type) {
+	if (type.id() == LogicalTypeId::UNKNOWN) {
+		throw ParameterNotResolvedException();
+	}
 	auto physical_type = type.InternalType();
 	switch (physical_type) {
 	case PhysicalType::BIT: