duckdb 0.9.2-dev22.0 → 0.9.2-dev26.0
Sign up to get free protection for your applications and to get access to all the features.
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-timebucket.cpp +7 -10
- package/src/duckdb/extension/icu/icu-timezone.cpp +3 -0
- package/src/duckdb/extension/json/buffered_json_reader.cpp +11 -18
- package/src/duckdb/extension/json/json_scan.cpp +10 -5
- package/src/duckdb/extension/parquet/parquet_extension.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/view_catalog_entry.cpp +6 -12
- package/src/duckdb/src/catalog/catalog_set.cpp +3 -4
- package/src/duckdb/src/common/arrow/appender/union_data.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +17 -8
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +5 -1
- package/src/duckdb/src/common/enum_util.cpp +67 -0
- package/src/duckdb/src/common/file_system.cpp +5 -1
- package/src/duckdb/src/common/hive_partitioning.cpp +6 -3
- package/src/duckdb/src/common/multi_file_reader.cpp +4 -2
- package/src/duckdb/src/common/types/list_segment.cpp +4 -0
- package/src/duckdb/src/common/types/vector.cpp +66 -34
- package/src/duckdb/src/common/types.cpp +3 -1
- package/src/duckdb/src/core_functions/aggregate/holistic/quantile.cpp +84 -25
- package/src/duckdb/src/core_functions/function_list.cpp +2 -1
- package/src/duckdb/src/core_functions/scalar/date/strftime.cpp +8 -1
- package/src/duckdb/src/core_functions/scalar/math/numeric.cpp +23 -0
- package/src/duckdb/src/core_functions/scalar/string/jaccard.cpp +16 -23
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +27 -18
- package/src/duckdb/src/execution/index/art/art_key.cpp +4 -4
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -3
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +5 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +4 -3
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +25 -4
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +5 -2
- package/src/duckdb/src/execution/operator/schema/physical_drop.cpp +0 -1
- package/src/duckdb/src/execution/physical_operator.cpp +2 -1
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +5 -0
- package/src/duckdb/src/execution/window_executor.cpp +13 -1
- package/src/duckdb/src/function/cast/union/from_struct.cpp +24 -7
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +1 -1
- package/src/duckdb/src/function/function_set.cpp +1 -1
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -0
- package/src/duckdb/src/function/scalar/string/concat.cpp +4 -1
- package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +32 -0
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +46 -2
- package/src/duckdb/src/function/table/arrow.cpp +19 -17
- package/src/duckdb/src/function/table/arrow_conversion.cpp +67 -31
- package/src/duckdb/src/function/table/copy_csv.cpp +3 -3
- package/src/duckdb/src/function/table/system/pragma_user_agent.cpp +50 -0
- package/src/duckdb/src/function/table/system_functions.cpp +1 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/enum_data.hpp +8 -2
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +74 -4
- package/src/duckdb/src/include/duckdb/common/arrow/appender/map_data.hpp +82 -3
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +19 -9
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/enums/operator_result_type.hpp +6 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/filename_pattern.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/pipe_file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +14 -2
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/distributive_functions.hpp +6 -0
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/quantile_enum.hpp +21 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/math_functions.hpp +5 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +6 -3
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +6 -0
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +3 -1
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +37 -2
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +4 -0
- package/src/duckdb/src/include/duckdb/function/udf_function.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/extension/generated_extension_loader.hpp +5 -4
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +12 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +18 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/rule.hpp +0 -2
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +8 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_info.hpp +5 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_view_info.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parser/statement/create_statement.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +5 -7
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +0 -1
- package/src/duckdb/src/include/duckdb.h +1 -1
- package/src/duckdb/src/main/capi/config-c.cpp +1 -0
- package/src/duckdb/src/main/capi/duckdb-c.cpp +9 -1
- package/src/duckdb/src/main/config.cpp +18 -0
- package/src/duckdb/src/main/database.cpp +1 -0
- package/src/duckdb/src/main/extension/extension_alias.cpp +2 -1
- package/src/duckdb/src/main/extension/extension_helper.cpp +5 -4
- package/src/duckdb/src/main/settings/settings.cpp +49 -0
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +0 -8
- package/src/duckdb/src/optimizer/filter_combiner.cpp +37 -23
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +7 -4
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +5 -4
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +15 -4
- package/src/duckdb/src/parallel/pipeline_executor.cpp +81 -40
- package/src/duckdb/src/parser/parsed_data/create_view_info.cpp +27 -0
- package/src/duckdb/src/parser/statement/create_statement.cpp +4 -0
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +16 -3
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +7 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +3 -2
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +3 -0
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +76 -2
- package/src/duckdb/src/storage/data_table.cpp +7 -1
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +14 -0
- package/src/duckdb/src/storage/storage_info.cpp +2 -1
- package/src/duckdb/src/storage/table/row_version_manager.cpp +5 -3
- package/src/duckdb/src/transaction/commit_state.cpp +1 -0
- package/src/duckdb/third_party/parquet/parquet_types.cpp +224 -221
- package/src/duckdb/third_party/parquet/parquet_types.h +0 -14
- package/src/duckdb/ub_src_common_arrow_appender.cpp +0 -4
- package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +2 -0
- package/test/columns.test.ts +1 -1
package/package.json
CHANGED
@@ -76,24 +76,21 @@ struct ICUTimeBucket : public ICUDateFunc {
|
|
76
76
|
|
77
77
|
static inline timestamp_t WidthConvertibleToDaysCommon(int32_t bucket_width_days, const timestamp_t ts,
|
78
78
|
const timestamp_t origin, icu::Calendar *calendar) {
|
79
|
-
const auto trunc_days = TruncationFactory(DatePartSpecifier::DAY);
|
80
79
|
const auto sub_days = SubtractFactory(DatePartSpecifier::DAY);
|
81
80
|
|
82
|
-
|
83
|
-
trunc_days(calendar, tmp_micros);
|
84
|
-
timestamp_t truncated_ts = GetTimeUnsafe(calendar, tmp_micros);
|
85
|
-
|
86
|
-
int64_t ts_days = sub_days(calendar, origin, truncated_ts);
|
81
|
+
int64_t ts_days = sub_days(calendar, origin, ts);
|
87
82
|
int64_t result_days = (ts_days / bucket_width_days) * bucket_width_days;
|
88
83
|
if (result_days < NumericLimits<int32_t>::Minimum() || result_days > NumericLimits<int32_t>::Maximum()) {
|
89
84
|
throw OutOfRangeException("Timestamp out of range");
|
90
85
|
}
|
91
|
-
|
92
|
-
|
93
|
-
|
86
|
+
timestamp_t bucket = Add(calendar, origin, interval_t {0, static_cast<int32_t>(result_days), 0});
|
87
|
+
if (ts < bucket) {
|
88
|
+
D_ASSERT(ts < origin);
|
89
|
+
bucket = Add(calendar, bucket, interval_t {0, -bucket_width_days, 0});
|
90
|
+
D_ASSERT(ts > bucket);
|
94
91
|
}
|
95
92
|
|
96
|
-
return
|
93
|
+
return bucket;
|
97
94
|
}
|
98
95
|
|
99
96
|
static inline timestamp_t WidthConvertibleToMonthsCommon(int32_t bucket_width_months, const timestamp_t ts,
|
@@ -81,6 +81,9 @@ static void ICUTimeZoneFunction(ClientContext &context, TableFunctionInput &data
|
|
81
81
|
break;
|
82
82
|
}
|
83
83
|
|
84
|
+
// What PG reports is the total offset for today,
|
85
|
+
// which is the ICU total offset (i.e., "raw") plus the DST offset.
|
86
|
+
raw_offset_ms += dst_offset_ms;
|
84
87
|
output.SetValue(2, index, Value::INTERVAL(Interval::FromMicro(raw_offset_ms * Interval::MICROS_PER_MSEC)));
|
85
88
|
output.SetValue(3, index, Value(dst_offset_ms != 0));
|
86
89
|
++index;
|
@@ -23,7 +23,7 @@ bool JSONFileHandle::IsOpen() const {
|
|
23
23
|
}
|
24
24
|
|
25
25
|
void JSONFileHandle::Close() {
|
26
|
-
if (IsOpen() && file_handle->
|
26
|
+
if (IsOpen() && !file_handle->IsPipe()) {
|
27
27
|
file_handle->Close();
|
28
28
|
file_handle = nullptr;
|
29
29
|
}
|
@@ -72,30 +72,23 @@ void JSONFileHandle::ReadAtPosition(char *pointer, idx_t size, idx_t position, b
|
|
72
72
|
D_ASSERT(size != 0);
|
73
73
|
if (plain_file_source) {
|
74
74
|
file_handle->Read(pointer, size, position);
|
75
|
-
|
76
|
-
|
77
|
-
return;
|
78
|
-
}
|
79
|
-
|
80
|
-
if (sample_run) { // Cache the buffer
|
75
|
+
} else if (sample_run) { // Cache the buffer
|
81
76
|
file_handle->Read(pointer, size, position);
|
82
|
-
actual_reads++;
|
83
77
|
|
84
78
|
cached_buffers.emplace_back(allocator.Allocate(size));
|
85
79
|
memcpy(cached_buffers.back().get(), pointer, size);
|
86
80
|
cached_size += size;
|
81
|
+
} else {
|
82
|
+
if (!cached_buffers.empty() || position < cached_size) {
|
83
|
+
ReadFromCache(pointer, size, position);
|
84
|
+
}
|
87
85
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
if (!cached_buffers.empty() || position < cached_size) {
|
92
|
-
ReadFromCache(pointer, size, position);
|
93
|
-
actual_reads++;
|
86
|
+
if (size != 0) {
|
87
|
+
file_handle->Read(pointer, size, position);
|
88
|
+
}
|
94
89
|
}
|
95
|
-
|
96
|
-
|
97
|
-
file_handle->Read(pointer, size, position);
|
98
|
-
actual_reads++;
|
90
|
+
if (++actual_reads > requested_reads) {
|
91
|
+
throw InternalException("JSONFileHandle performed more actual reads than requested reads");
|
99
92
|
}
|
100
93
|
}
|
101
94
|
|
@@ -214,17 +214,22 @@ unique_ptr<GlobalTableFunctionState> JSONGlobalTableFunctionState::Init(ClientCo
|
|
214
214
|
|
215
215
|
idx_t JSONGlobalTableFunctionState::MaxThreads() const {
|
216
216
|
auto &bind_data = state.bind_data;
|
217
|
-
if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
|
218
|
-
return state.system_threads;
|
219
|
-
}
|
220
217
|
|
221
218
|
if (!state.json_readers.empty() && state.json_readers[0]->HasFileHandle()) {
|
219
|
+
// We opened and auto-detected a file, so we can get a better estimate
|
222
220
|
auto &reader = *state.json_readers[0];
|
223
|
-
if (
|
224
|
-
|
221
|
+
if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED ||
|
222
|
+
reader.GetFormat() == JSONFormat::NEWLINE_DELIMITED) {
|
223
|
+
return MaxValue<idx_t>(state.json_readers[0]->GetFileHandle().FileSize() / bind_data.maximum_object_size,
|
224
|
+
1);
|
225
225
|
}
|
226
226
|
}
|
227
227
|
|
228
|
+
if (bind_data.options.format == JSONFormat::NEWLINE_DELIMITED) {
|
229
|
+
// We haven't opened any files, so this is our best bet
|
230
|
+
return state.system_threads;
|
231
|
+
}
|
232
|
+
|
228
233
|
// One reader per file
|
229
234
|
return bind_data.files.size();
|
230
235
|
}
|
@@ -740,8 +740,8 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
|
|
740
740
|
}
|
741
741
|
}
|
742
742
|
|
743
|
-
unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
|
744
|
-
vector<LogicalType> &sql_types) {
|
743
|
+
unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, const CopyInfo &info, const vector<string> &names,
|
744
|
+
const vector<LogicalType> &sql_types) {
|
745
745
|
D_ASSERT(names.size() == sql_types.size());
|
746
746
|
bool row_group_size_bytes_set = false;
|
747
747
|
auto bind_data = make_uniq<ParquetWriteBindData>();
|
@@ -32,6 +32,7 @@ unique_ptr<CreateInfo> ViewCatalogEntry::GetInfo() const {
|
|
32
32
|
result->query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
|
33
33
|
result->aliases = aliases;
|
34
34
|
result->types = types;
|
35
|
+
result->temporary = temporary;
|
35
36
|
return std::move(result);
|
36
37
|
}
|
37
38
|
|
@@ -58,23 +59,16 @@ string ViewCatalogEntry::ToSQL() const {
|
|
58
59
|
//! Return empty sql with view name so pragma view_tables don't complain
|
59
60
|
return sql;
|
60
61
|
}
|
61
|
-
|
62
|
+
auto info = GetInfo();
|
63
|
+
auto result = info->ToString();
|
64
|
+
return result + ";\n";
|
62
65
|
}
|
63
66
|
|
64
67
|
unique_ptr<CatalogEntry> ViewCatalogEntry::Copy(ClientContext &context) const {
|
65
68
|
D_ASSERT(!internal);
|
66
|
-
|
67
|
-
create_info.query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
|
68
|
-
for (idx_t i = 0; i < aliases.size(); i++) {
|
69
|
-
create_info.aliases.push_back(aliases[i]);
|
70
|
-
}
|
71
|
-
for (idx_t i = 0; i < types.size(); i++) {
|
72
|
-
create_info.types.push_back(types[i]);
|
73
|
-
}
|
74
|
-
create_info.temporary = temporary;
|
75
|
-
create_info.sql = sql;
|
69
|
+
auto create_info = GetInfo();
|
76
70
|
|
77
|
-
return make_uniq<ViewCatalogEntry>(catalog, schema, create_info);
|
71
|
+
return make_uniq<ViewCatalogEntry>(catalog, schema, create_info->Cast<CreateViewInfo>());
|
78
72
|
}
|
79
73
|
|
80
74
|
} // namespace duckdb
|
@@ -199,6 +199,8 @@ bool CatalogSet::AlterOwnership(CatalogTransaction transaction, ChangeOwnershipI
|
|
199
199
|
bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name, AlterInfo &alter_info) {
|
200
200
|
// lock the catalog for writing
|
201
201
|
lock_guard<mutex> write_lock(catalog.GetWriteLock());
|
202
|
+
// lock this catalog set to disallow reading
|
203
|
+
lock_guard<mutex> read_lock(catalog_lock);
|
202
204
|
|
203
205
|
// first check if the entry exists in the unordered set
|
204
206
|
EntryIndex entry_index;
|
@@ -210,9 +212,6 @@ bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name,
|
|
210
212
|
throw CatalogException("Cannot alter entry \"%s\" because it is an internal system entry", entry->name);
|
211
213
|
}
|
212
214
|
|
213
|
-
// lock this catalog set to disallow reading
|
214
|
-
lock_guard<mutex> read_lock(catalog_lock);
|
215
|
-
|
216
215
|
// create a new entry and replace the currently stored one
|
217
216
|
// set the timestamp to the timestamp of the current transaction
|
218
217
|
// and point it to the updated table node
|
@@ -316,6 +315,7 @@ void CatalogSet::DropEntryInternal(CatalogTransaction transaction, EntryIndex en
|
|
316
315
|
bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, bool cascade, bool allow_drop_internal) {
|
317
316
|
// lock the catalog for writing
|
318
317
|
lock_guard<mutex> write_lock(catalog.GetWriteLock());
|
318
|
+
lock_guard<mutex> read_lock(catalog_lock);
|
319
319
|
// we can only delete an entry that exists
|
320
320
|
EntryIndex entry_index;
|
321
321
|
auto entry = GetEntryInternal(transaction, name, &entry_index);
|
@@ -326,7 +326,6 @@ bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, b
|
|
326
326
|
throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name);
|
327
327
|
}
|
328
328
|
|
329
|
-
lock_guard<mutex> read_lock(catalog_lock);
|
330
329
|
DropEntryInternal(transaction, std::move(entry_index), *entry, cascade);
|
331
330
|
return true;
|
332
331
|
}
|
@@ -24,7 +24,7 @@ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t f
|
|
24
24
|
|
25
25
|
duckdb::vector<Vector> child_vectors;
|
26
26
|
for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
|
27
|
-
child_vectors.emplace_back(child.second);
|
27
|
+
child_vectors.emplace_back(child.second, size);
|
28
28
|
}
|
29
29
|
|
30
30
|
for (idx_t input_idx = from; input_idx < to; input_idx++) {
|
@@ -193,26 +193,26 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
|
|
193
193
|
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
194
194
|
InitializeAppenderForType<ArrowVarcharData<string_t>>(append_data);
|
195
195
|
} else {
|
196
|
-
InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter,
|
196
|
+
InitializeAppenderForType<ArrowVarcharData<string_t, ArrowVarcharConverter, int32_t>>(append_data);
|
197
197
|
}
|
198
198
|
break;
|
199
199
|
case LogicalTypeId::UUID:
|
200
200
|
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
201
201
|
InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter>>(append_data);
|
202
202
|
} else {
|
203
|
-
InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter,
|
203
|
+
InitializeAppenderForType<ArrowVarcharData<hugeint_t, ArrowUUIDConverter, int32_t>>(append_data);
|
204
204
|
}
|
205
205
|
break;
|
206
206
|
case LogicalTypeId::ENUM:
|
207
207
|
switch (type.InternalType()) {
|
208
208
|
case PhysicalType::UINT8:
|
209
|
-
InitializeAppenderForType<ArrowEnumData<
|
209
|
+
InitializeAppenderForType<ArrowEnumData<int8_t>>(append_data);
|
210
210
|
break;
|
211
211
|
case PhysicalType::UINT16:
|
212
|
-
InitializeAppenderForType<ArrowEnumData<
|
212
|
+
InitializeAppenderForType<ArrowEnumData<int16_t>>(append_data);
|
213
213
|
break;
|
214
214
|
case PhysicalType::UINT32:
|
215
|
-
InitializeAppenderForType<ArrowEnumData<
|
215
|
+
InitializeAppenderForType<ArrowEnumData<int32_t>>(append_data);
|
216
216
|
break;
|
217
217
|
default:
|
218
218
|
throw InternalException("Unsupported internal enum type");
|
@@ -227,11 +227,20 @@ static void InitializeFunctionPointers(ArrowAppendData &append_data, const Logic
|
|
227
227
|
case LogicalTypeId::STRUCT:
|
228
228
|
InitializeAppenderForType<ArrowStructData>(append_data);
|
229
229
|
break;
|
230
|
-
case LogicalTypeId::LIST:
|
231
|
-
|
230
|
+
case LogicalTypeId::LIST: {
|
231
|
+
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
232
|
+
InitializeAppenderForType<ArrowListData<int64_t>>(append_data);
|
233
|
+
} else {
|
234
|
+
InitializeAppenderForType<ArrowListData<int32_t>>(append_data);
|
235
|
+
}
|
232
236
|
break;
|
237
|
+
}
|
233
238
|
case LogicalTypeId::MAP:
|
234
|
-
|
239
|
+
if (append_data.options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
240
|
+
InitializeAppenderForType<ArrowMapData<int64_t>>(append_data);
|
241
|
+
} else {
|
242
|
+
InitializeAppenderForType<ArrowMapData<int32_t>>(append_data);
|
243
|
+
}
|
235
244
|
break;
|
236
245
|
default:
|
237
246
|
throw NotImplementedException("Unsupported type in DuckDB -> Arrow Conversion: %s\n", type.ToString());
|
@@ -187,7 +187,11 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
|
|
187
187
|
break;
|
188
188
|
}
|
189
189
|
case LogicalTypeId::LIST: {
|
190
|
-
|
190
|
+
if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
|
191
|
+
child.format = "+L";
|
192
|
+
} else {
|
193
|
+
child.format = "+l";
|
194
|
+
}
|
191
195
|
child.n_children = 1;
|
192
196
|
root_holder.nested_children.emplace_back();
|
193
197
|
root_holder.nested_children.back().resize(1);
|
@@ -64,6 +64,7 @@
|
|
64
64
|
#include "duckdb/common/types/timestamp.hpp"
|
65
65
|
#include "duckdb/common/types/vector.hpp"
|
66
66
|
#include "duckdb/common/types/vector_buffer.hpp"
|
67
|
+
#include "duckdb/core_functions/aggregate/quantile_enum.hpp"
|
67
68
|
#include "duckdb/execution/index/art/art.hpp"
|
68
69
|
#include "duckdb/execution/index/art/node.hpp"
|
69
70
|
#include "duckdb/execution/operator/scan/csv/base_csv_reader.hpp"
|
@@ -4571,6 +4572,44 @@ ProfilerPrintFormat EnumUtil::FromString<ProfilerPrintFormat>(const char *value)
|
|
4571
4572
|
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
4572
4573
|
}
|
4573
4574
|
|
4575
|
+
template<>
|
4576
|
+
const char* EnumUtil::ToChars<QuantileSerializationType>(QuantileSerializationType value) {
|
4577
|
+
switch(value) {
|
4578
|
+
case QuantileSerializationType::NON_DECIMAL:
|
4579
|
+
return "NON_DECIMAL";
|
4580
|
+
case QuantileSerializationType::DECIMAL_DISCRETE:
|
4581
|
+
return "DECIMAL_DISCRETE";
|
4582
|
+
case QuantileSerializationType::DECIMAL_DISCRETE_LIST:
|
4583
|
+
return "DECIMAL_DISCRETE_LIST";
|
4584
|
+
case QuantileSerializationType::DECIMAL_CONTINUOUS:
|
4585
|
+
return "DECIMAL_CONTINUOUS";
|
4586
|
+
case QuantileSerializationType::DECIMAL_CONTINUOUS_LIST:
|
4587
|
+
return "DECIMAL_CONTINUOUS_LIST";
|
4588
|
+
default:
|
4589
|
+
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
|
4590
|
+
}
|
4591
|
+
}
|
4592
|
+
|
4593
|
+
template<>
|
4594
|
+
QuantileSerializationType EnumUtil::FromString<QuantileSerializationType>(const char *value) {
|
4595
|
+
if (StringUtil::Equals(value, "NON_DECIMAL")) {
|
4596
|
+
return QuantileSerializationType::NON_DECIMAL;
|
4597
|
+
}
|
4598
|
+
if (StringUtil::Equals(value, "DECIMAL_DISCRETE")) {
|
4599
|
+
return QuantileSerializationType::DECIMAL_DISCRETE;
|
4600
|
+
}
|
4601
|
+
if (StringUtil::Equals(value, "DECIMAL_DISCRETE_LIST")) {
|
4602
|
+
return QuantileSerializationType::DECIMAL_DISCRETE_LIST;
|
4603
|
+
}
|
4604
|
+
if (StringUtil::Equals(value, "DECIMAL_CONTINUOUS")) {
|
4605
|
+
return QuantileSerializationType::DECIMAL_CONTINUOUS;
|
4606
|
+
}
|
4607
|
+
if (StringUtil::Equals(value, "DECIMAL_CONTINUOUS_LIST")) {
|
4608
|
+
return QuantileSerializationType::DECIMAL_CONTINUOUS_LIST;
|
4609
|
+
}
|
4610
|
+
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
4611
|
+
}
|
4612
|
+
|
4574
4613
|
template<>
|
4575
4614
|
const char* EnumUtil::ToChars<QueryNodeType>(QueryNodeType value) {
|
4576
4615
|
switch(value) {
|
@@ -5118,6 +5157,29 @@ SinkFinalizeType EnumUtil::FromString<SinkFinalizeType>(const char *value) {
|
|
5118
5157
|
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
5119
5158
|
}
|
5120
5159
|
|
5160
|
+
template<>
|
5161
|
+
const char* EnumUtil::ToChars<SinkNextBatchType>(SinkNextBatchType value) {
|
5162
|
+
switch(value) {
|
5163
|
+
case SinkNextBatchType::READY:
|
5164
|
+
return "READY";
|
5165
|
+
case SinkNextBatchType::BLOCKED:
|
5166
|
+
return "BLOCKED";
|
5167
|
+
default:
|
5168
|
+
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
|
5169
|
+
}
|
5170
|
+
}
|
5171
|
+
|
5172
|
+
template<>
|
5173
|
+
SinkNextBatchType EnumUtil::FromString<SinkNextBatchType>(const char *value) {
|
5174
|
+
if (StringUtil::Equals(value, "READY")) {
|
5175
|
+
return SinkNextBatchType::READY;
|
5176
|
+
}
|
5177
|
+
if (StringUtil::Equals(value, "BLOCKED")) {
|
5178
|
+
return SinkNextBatchType::BLOCKED;
|
5179
|
+
}
|
5180
|
+
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
5181
|
+
}
|
5182
|
+
|
5121
5183
|
template<>
|
5122
5184
|
const char* EnumUtil::ToChars<SinkResultType>(SinkResultType value) {
|
5123
5185
|
switch(value) {
|
@@ -6010,6 +6072,8 @@ const char* EnumUtil::ToChars<UnionInvalidReason>(UnionInvalidReason value) {
|
|
6010
6072
|
return "VALIDITY_OVERLAP";
|
6011
6073
|
case UnionInvalidReason::TAG_MISMATCH:
|
6012
6074
|
return "TAG_MISMATCH";
|
6075
|
+
case UnionInvalidReason::NULL_TAG:
|
6076
|
+
return "NULL_TAG";
|
6013
6077
|
default:
|
6014
6078
|
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
|
6015
6079
|
}
|
@@ -6032,6 +6096,9 @@ UnionInvalidReason EnumUtil::FromString<UnionInvalidReason>(const char *value) {
|
|
6032
6096
|
if (StringUtil::Equals(value, "TAG_MISMATCH")) {
|
6033
6097
|
return UnionInvalidReason::TAG_MISMATCH;
|
6034
6098
|
}
|
6099
|
+
if (StringUtil::Equals(value, "NULL_TAG")) {
|
6100
|
+
return UnionInvalidReason::NULL_TAG;
|
6101
|
+
}
|
6035
6102
|
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
|
6036
6103
|
}
|
6037
6104
|
|
@@ -344,7 +344,7 @@ bool FileSystem::FileExists(const string &filename) {
|
|
344
344
|
}
|
345
345
|
|
346
346
|
bool FileSystem::IsPipe(const string &filename) {
|
347
|
-
|
347
|
+
return false;
|
348
348
|
}
|
349
349
|
|
350
350
|
void FileSystem::RemoveFile(const string &filename) {
|
@@ -500,6 +500,10 @@ bool FileHandle::CanSeek() {
|
|
500
500
|
return file_system.CanSeek();
|
501
501
|
}
|
502
502
|
|
503
|
+
bool FileHandle::IsPipe() {
|
504
|
+
return file_system.IsPipe(path);
|
505
|
+
}
|
506
|
+
|
503
507
|
string FileHandle::ReadLine() {
|
504
508
|
string result;
|
505
509
|
char buffer[1];
|
@@ -64,7 +64,10 @@ static void ConvertKnownColRefToConstants(unique_ptr<Expression> &expr,
|
|
64
64
|
// - s3://bucket/var1=value1/bla/bla/var2=value2
|
65
65
|
// - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
|
66
66
|
// - folder/folder/folder/../var1=value1/etc/.//var2=value2
|
67
|
-
const string HivePartitioning::
|
67
|
+
const string &HivePartitioning::RegexString() {
|
68
|
+
static string REGEX = "[\\/\\\\]([^\\/\\?\\\\]+)=([^\\/\\n\\?\\\\]+)";
|
69
|
+
return REGEX;
|
70
|
+
}
|
68
71
|
|
69
72
|
std::map<string, string> HivePartitioning::Parse(const string &filename, duckdb_re2::RE2 ®ex) {
|
70
73
|
std::map<string, string> result;
|
@@ -79,7 +82,7 @@ std::map<string, string> HivePartitioning::Parse(const string &filename, duckdb_
|
|
79
82
|
}
|
80
83
|
|
81
84
|
std::map<string, string> HivePartitioning::Parse(const string &filename) {
|
82
|
-
duckdb_re2::RE2 regex(
|
85
|
+
duckdb_re2::RE2 regex(RegexString());
|
83
86
|
return Parse(filename, regex);
|
84
87
|
}
|
85
88
|
|
@@ -94,7 +97,7 @@ void HivePartitioning::ApplyFiltersToFileList(ClientContext &context, vector<str
|
|
94
97
|
vector<bool> have_preserved_filter(filters.size(), false);
|
95
98
|
vector<unique_ptr<Expression>> pruned_filters;
|
96
99
|
unordered_set<idx_t> filters_applied_to_files;
|
97
|
-
duckdb_re2::RE2 regex(
|
100
|
+
duckdb_re2::RE2 regex(RegexString());
|
98
101
|
auto table_index = get.table_index;
|
99
102
|
|
100
103
|
if ((!filename_enabled && !hive_enabled) || filters.empty()) {
|
@@ -102,7 +102,9 @@ bool MultiFileReader::ComplexFilterPushdown(ClientContext &context, vector<strin
|
|
102
102
|
|
103
103
|
unordered_map<string, column_t> column_map;
|
104
104
|
for (idx_t i = 0; i < get.column_ids.size(); i++) {
|
105
|
-
|
105
|
+
if (!IsRowIdColumnId(get.column_ids[i])) {
|
106
|
+
column_map.insert({get.names[get.column_ids[i]], i});
|
107
|
+
}
|
106
108
|
}
|
107
109
|
|
108
110
|
auto start_files = files.size();
|
@@ -432,7 +434,7 @@ void MultiFileReaderOptions::AutoDetectHiveTypesInternal(const string &file, Cli
|
|
432
434
|
}
|
433
435
|
Value value(part.second);
|
434
436
|
for (auto &candidate : candidates) {
|
435
|
-
const bool success = value.TryCastAs(context, candidate);
|
437
|
+
const bool success = value.TryCastAs(context, candidate, true);
|
436
438
|
if (success) {
|
437
439
|
hive_types_schema[name] = candidate;
|
438
440
|
break;
|
@@ -462,6 +462,10 @@ void SegmentPrimitiveFunction(ListSegmentFunctions &functions) {
|
|
462
462
|
|
463
463
|
void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type) {
|
464
464
|
|
465
|
+
if (type.id() == LogicalTypeId::UNKNOWN) {
|
466
|
+
throw ParameterNotResolvedException();
|
467
|
+
}
|
468
|
+
|
465
469
|
auto physical_type = type.InternalType();
|
466
470
|
switch (physical_type) {
|
467
471
|
case PhysicalType::BIT:
|
@@ -1131,9 +1131,12 @@ void Vector::VerifyMap(Vector &vector_p, const SelectionVector &sel_p, idx_t cou
|
|
1131
1131
|
|
1132
1132
|
void Vector::VerifyUnion(Vector &vector_p, const SelectionVector &sel_p, idx_t count) {
|
1133
1133
|
#ifdef DEBUG
|
1134
|
+
|
1134
1135
|
D_ASSERT(vector_p.GetType().id() == LogicalTypeId::UNION);
|
1135
1136
|
auto valid_check = UnionVector::CheckUnionValidity(vector_p, count, sel_p);
|
1136
|
-
|
1137
|
+
if (valid_check != UnionInvalidReason::VALID) {
|
1138
|
+
throw InternalException("Union not valid, reason: %s", EnumUtil::ToString(valid_check));
|
1139
|
+
}
|
1137
1140
|
#endif // DEBUG
|
1138
1141
|
}
|
1139
1142
|
|
@@ -1250,7 +1253,8 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
|
|
1250
1253
|
}
|
1251
1254
|
|
1252
1255
|
if (vector->GetType().id() == LogicalTypeId::UNION) {
|
1253
|
-
|
1256
|
+
// Pass in raw vector
|
1257
|
+
VerifyUnion(vector_p, sel_p, count);
|
1254
1258
|
}
|
1255
1259
|
}
|
1256
1260
|
|
@@ -1911,7 +1915,13 @@ void UnionVector::SetToMember(Vector &union_vector, union_tag_t tag, Vector &mem
|
|
1911
1915
|
// if the member vector is constant, we can set the union to constant as well
|
1912
1916
|
union_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
|
1913
1917
|
ConstantVector::GetData<union_tag_t>(tag_vector)[0] = tag;
|
1914
|
-
|
1918
|
+
if (keep_tags_for_null) {
|
1919
|
+
ConstantVector::SetNull(union_vector, false);
|
1920
|
+
ConstantVector::SetNull(tag_vector, false);
|
1921
|
+
} else {
|
1922
|
+
ConstantVector::SetNull(union_vector, ConstantVector::IsNull(member_vector));
|
1923
|
+
ConstantVector::SetNull(tag_vector, ConstantVector::IsNull(member_vector));
|
1924
|
+
}
|
1915
1925
|
|
1916
1926
|
} else {
|
1917
1927
|
// otherwise flatten and set to flatvector
|
@@ -1962,53 +1972,75 @@ union_tag_t UnionVector::GetTag(const Vector &vector, idx_t index) {
|
|
1962
1972
|
return FlatVector::GetData<union_tag_t>(tag_vector)[index];
|
1963
1973
|
}
|
1964
1974
|
|
1965
|
-
|
1966
|
-
|
1967
|
-
|
1975
|
+
//! Raw selection vector passed in (not merged with any other selection vectors)
|
1976
|
+
UnionInvalidReason UnionVector::CheckUnionValidity(Vector &vector_p, idx_t count, const SelectionVector &sel_p) {
|
1977
|
+
D_ASSERT(vector_p.GetType().id() == LogicalTypeId::UNION);
|
1978
|
+
|
1979
|
+
// Will contain the (possibly) merged selection vector
|
1980
|
+
const SelectionVector *sel = &sel_p;
|
1981
|
+
SelectionVector owned_sel;
|
1982
|
+
Vector *vector = &vector_p;
|
1983
|
+
if (vector->GetVectorType() == VectorType::DICTIONARY_VECTOR) {
|
1984
|
+
// In the case of a dictionary vector, unwrap the Vector, and merge the selection vectors.
|
1985
|
+
auto &child = DictionaryVector::Child(*vector);
|
1986
|
+
D_ASSERT(child.GetVectorType() != VectorType::DICTIONARY_VECTOR);
|
1987
|
+
auto &dict_sel = DictionaryVector::SelVector(*vector);
|
1988
|
+
// merge the selection vectors and verify the child
|
1989
|
+
auto new_buffer = dict_sel.Slice(*sel, count);
|
1990
|
+
owned_sel.Initialize(new_buffer);
|
1991
|
+
sel = &owned_sel;
|
1992
|
+
vector = &child;
|
1993
|
+
} else if (vector->GetVectorType() == VectorType::CONSTANT_VECTOR) {
|
1994
|
+
sel = ConstantVector::ZeroSelectionVector(count, owned_sel);
|
1995
|
+
}
|
1996
|
+
|
1997
|
+
auto member_count = UnionType::GetMemberCount(vector_p.GetType());
|
1968
1998
|
if (member_count == 0) {
|
1969
1999
|
return UnionInvalidReason::NO_MEMBERS;
|
1970
2000
|
}
|
1971
2001
|
|
1972
|
-
UnifiedVectorFormat
|
1973
|
-
|
2002
|
+
UnifiedVectorFormat vector_vdata;
|
2003
|
+
vector_p.ToUnifiedFormat(count, vector_vdata);
|
1974
2004
|
|
1975
|
-
|
1976
|
-
|
1977
|
-
|
2005
|
+
auto &entries = StructVector::GetEntries(vector_p);
|
2006
|
+
duckdb::vector<UnifiedVectorFormat> child_vdata(entries.size());
|
2007
|
+
for (idx_t entry_idx = 0; entry_idx < entries.size(); entry_idx++) {
|
2008
|
+
auto &child = *entries[entry_idx];
|
2009
|
+
child.ToUnifiedFormat(count, child_vdata[entry_idx]);
|
2010
|
+
}
|
2011
|
+
|
2012
|
+
auto &tag_vdata = child_vdata[0];
|
1978
2013
|
|
1979
|
-
// check that only one member is valid at a time
|
1980
2014
|
for (idx_t row_idx = 0; row_idx < count; row_idx++) {
|
1981
|
-
auto
|
1982
|
-
if (!union_vdata.validity.RowIsValid(union_mapped_row_idx)) {
|
1983
|
-
continue;
|
1984
|
-
}
|
2015
|
+
auto mapped_idx = sel->get_index(row_idx);
|
1985
2016
|
|
1986
|
-
|
1987
|
-
if (!tags_vdata.validity.RowIsValid(tag_mapped_row_idx)) {
|
2017
|
+
if (!vector_vdata.validity.RowIsValid(mapped_idx)) {
|
1988
2018
|
continue;
|
1989
2019
|
}
|
1990
2020
|
|
1991
|
-
auto
|
2021
|
+
auto tag_idx = tag_vdata.sel->get_index(sel_p.get_index(row_idx));
|
2022
|
+
if (!tag_vdata.validity.RowIsValid(tag_idx)) {
|
2023
|
+
// we can't have NULL tags!
|
2024
|
+
return UnionInvalidReason::NULL_TAG;
|
2025
|
+
}
|
2026
|
+
auto tag = UnifiedVectorFormat::GetData<union_tag_t>(tag_vdata)[tag_idx];
|
1992
2027
|
if (tag >= member_count) {
|
1993
2028
|
return UnionInvalidReason::TAG_OUT_OF_RANGE;
|
1994
2029
|
}
|
1995
2030
|
|
1996
2031
|
bool found_valid = false;
|
1997
|
-
for (idx_t
|
1998
|
-
|
1999
|
-
|
2000
|
-
|
2001
|
-
|
2002
|
-
|
2003
|
-
|
2004
|
-
|
2005
|
-
|
2006
|
-
|
2007
|
-
|
2008
|
-
|
2009
|
-
if (tag != static_cast<union_tag_t>(member_idx)) {
|
2010
|
-
return UnionInvalidReason::TAG_MISMATCH;
|
2011
|
-
}
|
2032
|
+
for (idx_t i = 0; i < member_count; i++) {
|
2033
|
+
auto &member_vdata = child_vdata[1 + i]; // skip the tag
|
2034
|
+
idx_t member_idx = member_vdata.sel->get_index(sel_p.get_index(row_idx));
|
2035
|
+
if (!member_vdata.validity.RowIsValid(member_idx)) {
|
2036
|
+
continue;
|
2037
|
+
}
|
2038
|
+
if (found_valid) {
|
2039
|
+
return UnionInvalidReason::VALIDITY_OVERLAP;
|
2040
|
+
}
|
2041
|
+
found_valid = true;
|
2042
|
+
if (tag != static_cast<union_tag_t>(i)) {
|
2043
|
+
return UnionInvalidReason::TAG_MISMATCH;
|
2012
2044
|
}
|
2013
2045
|
}
|
2014
2046
|
}
|
@@ -373,7 +373,9 @@ string LogicalType::ToString() const {
|
|
373
373
|
string ret = "UNION(";
|
374
374
|
size_t count = UnionType::GetMemberCount(*this);
|
375
375
|
for (size_t i = 0; i < count; i++) {
|
376
|
-
|
376
|
+
auto member_name = UnionType::GetMemberName(*this, i);
|
377
|
+
auto member_type = UnionType::GetMemberType(*this, i).ToString();
|
378
|
+
ret += StringUtil::Format("%s %s", SQLIdentifier(member_name), member_type);
|
377
379
|
if (i < count - 1) {
|
378
380
|
ret += ", ";
|
379
381
|
}
|