duckdb 1.2.1-dev4.0 → 1.2.1-dev8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/connection.cpp +57 -35
- package/src/duckdb/extension/core_functions/aggregate/distributive/string_agg.cpp +14 -22
- package/src/duckdb/extension/core_functions/aggregate/nested/list.cpp +0 -1
- package/src/duckdb/extension/core_functions/lambda_functions.cpp +0 -11
- package/src/duckdb/extension/core_functions/scalar/list/list_aggregates.cpp +18 -6
- package/src/duckdb/extension/icu/icu-datefunc.cpp +9 -2
- package/src/duckdb/extension/icu/icu-strptime.cpp +7 -11
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +3 -1
- package/src/duckdb/extension/json/buffered_json_reader.cpp +18 -31
- package/src/duckdb/extension/json/json_extension.cpp +8 -3
- package/src/duckdb/extension/parquet/column_reader.cpp +4 -6
- package/src/duckdb/extension/parquet/column_writer.cpp +33 -12
- package/src/duckdb/extension/parquet/include/column_reader.hpp +0 -2
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +0 -1
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +1 -2
- package/src/duckdb/src/catalog/catalog.cpp +12 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +8 -8
- package/src/duckdb/src/common/bind_helpers.cpp +3 -0
- package/src/duckdb/src/common/compressed_file_system.cpp +2 -0
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +3 -3
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +1 -1
- package/src/duckdb/src/execution/index/art/art.cpp +19 -6
- package/src/duckdb/src/execution/index/art/iterator.cpp +7 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +11 -4
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +5 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +3 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +20 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +19 -22
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +1 -0
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +16 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +1 -0
- package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +16 -7
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +3 -1
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +11 -1
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +5 -7
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -0
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +1 -3
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +14 -5
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +24 -12
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +3 -3
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +3 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +108 -48
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +5 -5
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +6 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +10 -9
- package/src/duckdb/src/include/duckdb/common/adbc/adbc-init.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/art/iterator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_streaming_sample.hpp +3 -7
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +11 -3
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/client_context_state.hpp +4 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +25 -7
- package/src/duckdb/src/include/duckdb/main/pending_query_result.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +7 -0
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +2 -1
- package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +11 -5
- package/src/duckdb/src/include/duckdb/parallel/executor_task.hpp +4 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +0 -1
- package/src/duckdb/src/include/duckdb/parallel/task_executor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/task_notifier.hpp +27 -0
- package/src/duckdb/src/include/duckdb/parallel/task_scheduler.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/planner/tableref/bound_cteref.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +3 -2
- package/src/duckdb/src/include/duckdb.h +495 -480
- package/src/duckdb/src/main/attached_database.cpp +1 -1
- package/src/duckdb/src/main/capi/duckdb-c.cpp +5 -1
- package/src/duckdb/src/main/capi/helper-c.cpp +8 -0
- package/src/duckdb/src/main/config.cpp +7 -1
- package/src/duckdb/src/main/database.cpp +8 -8
- package/src/duckdb/src/main/extension/extension_helper.cpp +3 -1
- package/src/duckdb/src/main/extension/extension_load.cpp +12 -12
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +1 -0
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -2
- package/src/duckdb/src/optimizer/late_materialization.cpp +26 -5
- package/src/duckdb/src/optimizer/optimizer.cpp +12 -1
- package/src/duckdb/src/parallel/executor_task.cpp +10 -6
- package/src/duckdb/src/parallel/task_executor.cpp +4 -1
- package/src/duckdb/src/parallel/task_notifier.cpp +23 -0
- package/src/duckdb/src/parallel/task_scheduler.cpp +33 -0
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +4 -1
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +4 -2
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +7 -2
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +6 -5
- package/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +4 -2
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +21 -10
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/storage_manager.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group.cpp +5 -6
- package/src/duckdb/src/storage/table/scan_state.cpp +6 -0
- package/src/duckdb/src/transaction/duck_transaction.cpp +11 -3
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +2 -2
- package/src/duckdb/third_party/concurrentqueue/concurrentqueue.h +17 -0
- package/src/duckdb/ub_src_parallel.cpp +2 -0
@@ -309,6 +309,7 @@ struct PageInformation {
|
|
309
309
|
idx_t offset = 0;
|
310
310
|
idx_t row_count = 0;
|
311
311
|
idx_t empty_count = 0;
|
312
|
+
idx_t null_count = 0;
|
312
313
|
idx_t estimated_page_size = 0;
|
313
314
|
};
|
314
315
|
|
@@ -388,7 +389,7 @@ protected:
|
|
388
389
|
virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
|
389
390
|
|
390
391
|
//! Initialize the writer for a specific page. Only used for scalar types.
|
391
|
-
virtual unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state);
|
392
|
+
virtual unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx);
|
392
393
|
|
393
394
|
//! Flushes the writer for a specific page. Only used for scalar types.
|
394
395
|
virtual void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state);
|
@@ -427,7 +428,8 @@ void BasicColumnWriter::RegisterToRowGroup(duckdb_parquet::RowGroup &row_group)
|
|
427
428
|
row_group.columns.push_back(std::move(column_chunk));
|
428
429
|
}
|
429
430
|
|
430
|
-
unique_ptr<ColumnWriterPageState> BasicColumnWriter::InitializePageState(BasicColumnWriterState &state
|
431
|
+
unique_ptr<ColumnWriterPageState> BasicColumnWriter::InitializePageState(BasicColumnWriterState &state,
|
432
|
+
idx_t page_idx) {
|
431
433
|
return nullptr;
|
432
434
|
}
|
433
435
|
|
@@ -463,6 +465,8 @@ void BasicColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *p
|
|
463
465
|
state.page_info.push_back(new_info);
|
464
466
|
page_info_ref = state.page_info.back();
|
465
467
|
}
|
468
|
+
} else {
|
469
|
+
page_info.null_count++;
|
466
470
|
}
|
467
471
|
vector_index++;
|
468
472
|
}
|
@@ -502,7 +506,7 @@ void BasicColumnWriter::BeginWrite(ColumnWriterState &state_p) {
|
|
502
506
|
MaxValue<idx_t>(NextPowerOfTwo(page_info.estimated_page_size), MemoryStream::DEFAULT_INITIAL_CAPACITY));
|
503
507
|
write_info.write_count = page_info.empty_count;
|
504
508
|
write_info.max_write_count = page_info.row_count;
|
505
|
-
write_info.page_state = InitializePageState(state);
|
509
|
+
write_info.page_state = InitializePageState(state, page_idx);
|
506
510
|
|
507
511
|
write_info.compressed_size = 0;
|
508
512
|
write_info.compressed_data = nullptr;
|
@@ -796,7 +800,6 @@ public:
|
|
796
800
|
};
|
797
801
|
|
798
802
|
struct BaseParquetOperator {
|
799
|
-
|
800
803
|
template <class SRC, class TGT>
|
801
804
|
static void WriteToStream(const TGT &input, WriteStream &ser) {
|
802
805
|
ser.WriteData(const_data_ptr_cast(&input), sizeof(TGT));
|
@@ -815,6 +818,11 @@ struct BaseParquetOperator {
|
|
815
818
|
template <class SRC, class TGT>
|
816
819
|
static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
|
817
820
|
}
|
821
|
+
|
822
|
+
template <class SRC, class TGT>
|
823
|
+
static idx_t GetRowSize(const Vector &, idx_t) {
|
824
|
+
return sizeof(TGT);
|
825
|
+
}
|
818
826
|
};
|
819
827
|
|
820
828
|
struct ParquetCastOperator : public BaseParquetOperator {
|
@@ -936,6 +944,11 @@ struct ParquetStringOperator : public BaseParquetOperator {
|
|
936
944
|
static uint64_t XXHash64(const TGT &target_value) {
|
937
945
|
return duckdb_zstd::XXH64(target_value.GetData(), target_value.GetSize(), 0);
|
938
946
|
}
|
947
|
+
|
948
|
+
template <class SRC, class TGT>
|
949
|
+
static idx_t GetRowSize(const Vector &vector, idx_t index) {
|
950
|
+
return FlatVector::GetData<string_t>(vector)[index].GetSize();
|
951
|
+
}
|
939
952
|
};
|
940
953
|
|
941
954
|
struct ParquetIntervalTargetType {
|
@@ -1066,6 +1079,7 @@ public:
|
|
1066
1079
|
// analysis state for integer values for DELTA_BINARY_PACKED/DELTA_LENGTH_BYTE_ARRAY
|
1067
1080
|
idx_t total_value_count = 0;
|
1068
1081
|
idx_t total_string_size = 0;
|
1082
|
+
uint32_t key_bit_width = 0;
|
1069
1083
|
|
1070
1084
|
unordered_map<T, uint32_t> dictionary;
|
1071
1085
|
duckdb_parquet::Encoding::type encoding;
|
@@ -1222,11 +1236,12 @@ public:
|
|
1222
1236
|
return std::move(result);
|
1223
1237
|
}
|
1224
1238
|
|
1225
|
-
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
|
1239
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p, idx_t page_idx) override {
|
1226
1240
|
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1227
|
-
|
1228
|
-
auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(
|
1229
|
-
|
1241
|
+
const auto &page_info = state_p.page_info[page_idx];
|
1242
|
+
auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(
|
1243
|
+
page_info.row_count - (page_info.empty_count + page_info.null_count), state.total_string_size,
|
1244
|
+
state.encoding, state.dictionary);
|
1230
1245
|
return std::move(result);
|
1231
1246
|
}
|
1232
1247
|
|
@@ -1335,6 +1350,8 @@ public:
|
|
1335
1350
|
}
|
1336
1351
|
}
|
1337
1352
|
state.dictionary.clear();
|
1353
|
+
} else {
|
1354
|
+
state.key_bit_width = RleBpDecoder::ComputeBitWidth(state.dictionary.size());
|
1338
1355
|
}
|
1339
1356
|
}
|
1340
1357
|
|
@@ -1488,9 +1505,13 @@ public:
|
|
1488
1505
|
// bloom filter will be queued for writing in ParquetWriter::BufferBloomFilter one level up
|
1489
1506
|
}
|
1490
1507
|
|
1491
|
-
// TODO this now vastly over-estimates the page size
|
1492
1508
|
idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state_p) const override {
|
1493
|
-
|
1509
|
+
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1510
|
+
if (state.encoding == Encoding::RLE_DICTIONARY) {
|
1511
|
+
return (state.key_bit_width + 7) / 8;
|
1512
|
+
} else {
|
1513
|
+
return OP::template GetRowSize<SRC, TGT>(vector, index);
|
1514
|
+
}
|
1494
1515
|
}
|
1495
1516
|
};
|
1496
1517
|
|
@@ -1570,7 +1591,7 @@ public:
|
|
1570
1591
|
}
|
1571
1592
|
}
|
1572
1593
|
|
1573
|
-
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
|
1594
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx) override {
|
1574
1595
|
return make_uniq<BooleanWriterPageState>();
|
1575
1596
|
}
|
1576
1597
|
|
@@ -1812,7 +1833,7 @@ public:
|
|
1812
1833
|
}
|
1813
1834
|
}
|
1814
1835
|
|
1815
|
-
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
|
1836
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state, idx_t page_idx) override {
|
1816
1837
|
return make_uniq<EnumWriterPageState>(bit_width);
|
1817
1838
|
}
|
1818
1839
|
|
@@ -160,7 +160,6 @@ protected:
|
|
160
160
|
|
161
161
|
private:
|
162
162
|
void AllocateBlock(idx_t size);
|
163
|
-
void AllocateCompressed(idx_t size);
|
164
163
|
void PrepareRead(parquet_filter_t &filter);
|
165
164
|
void PreparePage(PageHeader &page_hdr);
|
166
165
|
void PrepareDataPage(PageHeader &page_hdr);
|
@@ -178,7 +177,6 @@ private:
|
|
178
177
|
|
179
178
|
shared_ptr<ResizeableBuffer> block;
|
180
179
|
|
181
|
-
ResizeableBuffer compressed_buffer;
|
182
180
|
ResizeableBuffer offset_buffer;
|
183
181
|
|
184
182
|
unique_ptr<RleBpDecoder> dict_decoder;
|
@@ -33,9 +33,8 @@ public:
|
|
33
33
|
}
|
34
34
|
|
35
35
|
void FinishWrite(WriteStream &writer) {
|
36
|
-
D_ASSERT(stream->GetPosition() == total_string_size);
|
37
36
|
dbp_encoder.FinishWrite(writer);
|
38
|
-
writer.WriteData(buffer.get(),
|
37
|
+
writer.WriteData(buffer.get(), stream->GetPosition());
|
39
38
|
}
|
40
39
|
|
41
40
|
private:
|
@@ -769,6 +769,12 @@ CatalogEntryLookup Catalog::TryLookupEntry(CatalogEntryRetriever &retriever, Cat
|
|
769
769
|
|
770
770
|
if (if_not_found == OnEntryNotFound::RETURN_NULL) {
|
771
771
|
return {nullptr, nullptr, ErrorData()};
|
772
|
+
}
|
773
|
+
// Check if the default database is actually attached. CreateMissingEntryException will throw binder exception
|
774
|
+
// otherwise.
|
775
|
+
if (!GetCatalogEntry(context, GetDefaultCatalog(retriever))) {
|
776
|
+
auto except = CatalogException("%s with name %s does not exist!", CatalogTypeToString(type), name);
|
777
|
+
return {nullptr, nullptr, ErrorData(except)};
|
772
778
|
} else {
|
773
779
|
auto except = CreateMissingEntryException(retriever, name, type, schemas, error_context);
|
774
780
|
return {nullptr, nullptr, ErrorData(except)};
|
@@ -805,6 +811,12 @@ CatalogEntryLookup Catalog::TryLookupEntry(CatalogEntryRetriever &retriever, vec
|
|
805
811
|
|
806
812
|
if (if_not_found == OnEntryNotFound::RETURN_NULL) {
|
807
813
|
return {nullptr, nullptr, ErrorData()};
|
814
|
+
}
|
815
|
+
// Check if the default database is actually attached. CreateMissingEntryException will throw binder exception
|
816
|
+
// otherwise.
|
817
|
+
if (!GetCatalogEntry(context, GetDefaultCatalog(retriever))) {
|
818
|
+
auto except = CatalogException("%s with name %s does not exist!", CatalogTypeToString(type), name);
|
819
|
+
return {nullptr, nullptr, ErrorData(except)};
|
808
820
|
} else {
|
809
821
|
auto except = CreateMissingEntryException(retriever, name, type, schemas, error_context);
|
810
822
|
return {nullptr, nullptr, ErrorData(except)};
|
@@ -863,7 +863,7 @@ unique_ptr<CatalogEntry> DuckTableEntry::Copy(ClientContext &context) const {
|
|
863
863
|
}
|
864
864
|
|
865
865
|
auto binder = Binder::CreateBinder(context);
|
866
|
-
auto bound_create_info = binder->
|
866
|
+
auto bound_create_info = binder->BindCreateTableCheckpoint(std::move(create_info), schema);
|
867
867
|
return make_uniq<DuckTableEntry>(catalog, schema, *bound_create_info, storage);
|
868
868
|
}
|
869
869
|
|
@@ -76,7 +76,7 @@ void CatalogEntryRetriever::Inherit(const CatalogEntryRetriever &parent) {
|
|
76
76
|
this->search_path = parent.search_path;
|
77
77
|
}
|
78
78
|
|
79
|
-
CatalogSearchPath &CatalogEntryRetriever::GetSearchPath() {
|
79
|
+
const CatalogSearchPath &CatalogEntryRetriever::GetSearchPath() const {
|
80
80
|
if (search_path) {
|
81
81
|
return *search_path;
|
82
82
|
}
|
@@ -189,11 +189,11 @@ void CatalogSearchPath::Set(CatalogSearchEntry new_value, CatalogSetPathType set
|
|
189
189
|
Set(std::move(new_paths), set_type);
|
190
190
|
}
|
191
191
|
|
192
|
-
const vector<CatalogSearchEntry> &CatalogSearchPath::Get() {
|
192
|
+
const vector<CatalogSearchEntry> &CatalogSearchPath::Get() const {
|
193
193
|
return paths;
|
194
194
|
}
|
195
195
|
|
196
|
-
string CatalogSearchPath::GetDefaultSchema(const string &catalog) {
|
196
|
+
string CatalogSearchPath::GetDefaultSchema(const string &catalog) const {
|
197
197
|
for (auto &path : paths) {
|
198
198
|
if (path.catalog == TEMP_CATALOG) {
|
199
199
|
continue;
|
@@ -205,7 +205,7 @@ string CatalogSearchPath::GetDefaultSchema(const string &catalog) {
|
|
205
205
|
return DEFAULT_SCHEMA;
|
206
206
|
}
|
207
207
|
|
208
|
-
string CatalogSearchPath::GetDefaultSchema(ClientContext &context, const string &catalog) {
|
208
|
+
string CatalogSearchPath::GetDefaultSchema(ClientContext &context, const string &catalog) const {
|
209
209
|
for (auto &path : paths) {
|
210
210
|
if (path.catalog == TEMP_CATALOG) {
|
211
211
|
continue;
|
@@ -221,7 +221,7 @@ string CatalogSearchPath::GetDefaultSchema(ClientContext &context, const string
|
|
221
221
|
return DEFAULT_SCHEMA;
|
222
222
|
}
|
223
223
|
|
224
|
-
string CatalogSearchPath::GetDefaultCatalog(const string &schema) {
|
224
|
+
string CatalogSearchPath::GetDefaultCatalog(const string &schema) const {
|
225
225
|
if (DefaultSchemaGenerator::IsDefaultSchema(schema)) {
|
226
226
|
return SYSTEM_CATALOG;
|
227
227
|
}
|
@@ -236,7 +236,7 @@ string CatalogSearchPath::GetDefaultCatalog(const string &schema) {
|
|
236
236
|
return INVALID_CATALOG;
|
237
237
|
}
|
238
238
|
|
239
|
-
vector<string> CatalogSearchPath::GetCatalogsForSchema(const string &schema) {
|
239
|
+
vector<string> CatalogSearchPath::GetCatalogsForSchema(const string &schema) const {
|
240
240
|
vector<string> catalogs;
|
241
241
|
if (DefaultSchemaGenerator::IsDefaultSchema(schema)) {
|
242
242
|
catalogs.push_back(SYSTEM_CATALOG);
|
@@ -250,7 +250,7 @@ vector<string> CatalogSearchPath::GetCatalogsForSchema(const string &schema) {
|
|
250
250
|
return catalogs;
|
251
251
|
}
|
252
252
|
|
253
|
-
vector<string> CatalogSearchPath::GetSchemasForCatalog(const string &catalog) {
|
253
|
+
vector<string> CatalogSearchPath::GetSchemasForCatalog(const string &catalog) const {
|
254
254
|
vector<string> schemas;
|
255
255
|
for (auto &path : paths) {
|
256
256
|
if (StringUtil::CIEquals(path.catalog, catalog)) {
|
@@ -260,7 +260,7 @@ vector<string> CatalogSearchPath::GetSchemasForCatalog(const string &catalog) {
|
|
260
260
|
return schemas;
|
261
261
|
}
|
262
262
|
|
263
|
-
const CatalogSearchEntry &CatalogSearchPath::GetDefault() {
|
263
|
+
const CatalogSearchEntry &CatalogSearchPath::GetDefault() const {
|
264
264
|
const auto &paths = Get();
|
265
265
|
D_ASSERT(paths.size() >= 2);
|
266
266
|
return paths[1];
|
@@ -281,7 +281,7 @@ void CatalogSearchPath::SetPathsInternal(vector<CatalogSearchEntry> new_paths) {
|
|
281
281
|
}
|
282
282
|
|
283
283
|
bool CatalogSearchPath::SchemaInSearchPath(ClientContext &context, const string &catalog_name,
|
284
|
-
const string &schema_name) {
|
284
|
+
const string &schema_name) const {
|
285
285
|
for (auto &path : paths) {
|
286
286
|
if (!StringUtil::CIEquals(path.schema, schema_name)) {
|
287
287
|
continue;
|
@@ -56,6 +56,9 @@ vector<bool> ParseColumnList(const Value &value, vector<string> &names, const st
|
|
56
56
|
}
|
57
57
|
throw BinderException("\"%s\" expects a column list or * as parameter", loption);
|
58
58
|
}
|
59
|
+
if (value.IsNull()) {
|
60
|
+
throw BinderException("\"%s\" expects a column list or * as parameter, it can't be a NULL value", loption);
|
61
|
+
}
|
59
62
|
auto &children = ListValue::GetChildren(value);
|
60
63
|
// accept '*' as single argument
|
61
64
|
if (children.size() == 1 && children[0].type().id() == LogicalTypeId::VARCHAR &&
|
@@ -31,6 +31,8 @@ void CompressedFile::Initialize(bool write) {
|
|
31
31
|
stream_data.out_buff_start = stream_data.out_buff.get();
|
32
32
|
stream_data.out_buff_end = stream_data.out_buff.get();
|
33
33
|
|
34
|
+
current_position = 0;
|
35
|
+
|
34
36
|
stream_wrapper = compressed_fs.CreateStream();
|
35
37
|
stream_wrapper->Initialize(*this, write);
|
36
38
|
}
|
@@ -245,7 +245,7 @@ static void TemplatedGetHivePartitionValues(Vector &input, vector<HivePartitionK
|
|
245
245
|
|
246
246
|
const auto &type = input.GetType();
|
247
247
|
|
248
|
-
const auto reinterpret = Value::CreateValue<T>(data[0]).GetTypeMutable() != type;
|
248
|
+
const auto reinterpret = Value::CreateValue<T>(data[sel.get_index(0)]).GetTypeMutable() != type;
|
249
249
|
if (reinterpret) {
|
250
250
|
for (idx_t i = 0; i < count; i++) {
|
251
251
|
auto &key = keys[i];
|
@@ -508,14 +508,14 @@ void MultiFileReader::CreateMapping(const string &file_name,
|
|
508
508
|
// copy global columns and inject any different defaults
|
509
509
|
CreateColumnMapping(file_name, local_columns, global_columns, global_column_ids, reader_data, bind_data,
|
510
510
|
initial_file, global_state);
|
511
|
-
CreateFilterMap(
|
511
|
+
CreateFilterMap(global_column_ids, filters, reader_data, global_state);
|
512
512
|
}
|
513
513
|
|
514
|
-
void MultiFileReader::CreateFilterMap(const vector<
|
514
|
+
void MultiFileReader::CreateFilterMap(const vector<ColumnIndex> &global_column_ids,
|
515
515
|
optional_ptr<TableFilterSet> filters, MultiFileReaderData &reader_data,
|
516
516
|
optional_ptr<MultiFileReaderGlobalState> global_state) {
|
517
517
|
if (filters) {
|
518
|
-
auto filter_map_size =
|
518
|
+
auto filter_map_size = global_column_ids.size();
|
519
519
|
if (global_state) {
|
520
520
|
filter_map_size += global_state->extra_columns.size();
|
521
521
|
}
|
@@ -329,7 +329,7 @@ optional_idx GroupedAggregateHashTable::TryAddDictionaryGroups(DataChunk &groups
|
|
329
329
|
if (dictionary_id.empty()) {
|
330
330
|
// dictionary has no id, we can't cache across vectors
|
331
331
|
// only use dictionary compression if there are fewer entries than groups
|
332
|
-
if (dict_size >= groups.size()
|
332
|
+
if (dict_size * DICTIONARY_THRESHOLD >= groups.size()) {
|
333
333
|
// dictionary is too large - use regular aggregation
|
334
334
|
return optional_idx();
|
335
335
|
}
|
@@ -1038,9 +1038,11 @@ string ART::GenerateConstraintErrorMessage(VerifyExistenceType verify_type, cons
|
|
1038
1038
|
}
|
1039
1039
|
case VerifyExistenceType::DELETE_FK: {
|
1040
1040
|
// DELETE_FK that still exists in a FK table, i.e., not a valid delete.
|
1041
|
-
return StringUtil::Format(
|
1042
|
-
|
1043
|
-
|
1041
|
+
return StringUtil::Format(
|
1042
|
+
"Violates foreign key constraint because key \"%s\" is still referenced by a foreign "
|
1043
|
+
"key in a different table. If this is an unexpected constraint violation, please refer to our "
|
1044
|
+
"foreign key limitations in the documentation",
|
1045
|
+
key_name);
|
1044
1046
|
}
|
1045
1047
|
default:
|
1046
1048
|
throw NotImplementedException("Type not implemented for VerifyExistenceType");
|
@@ -1091,16 +1093,27 @@ void ART::VerifyLeaf(const Node &leaf, const ARTKey &key, optional_ptr<ART> dele
|
|
1091
1093
|
return;
|
1092
1094
|
}
|
1093
1095
|
|
1096
|
+
// Fast path for FOREIGN KEY constraints.
|
1097
|
+
// Up to here, the above code paths work implicitly for FKs, as the leaf is inlined.
|
1094
1098
|
// FIXME: proper foreign key + delete ART support.
|
1095
|
-
|
1096
|
-
|
1099
|
+
if (index_constraint_type == IndexConstraintType::FOREIGN) {
|
1100
|
+
D_ASSERT(!deleted_leaf);
|
1101
|
+
// We don't handle FK conflicts in UPSERT, so the row ID should not matter.
|
1102
|
+
if (manager.AddHit(i, MAX_ROW_ID)) {
|
1103
|
+
conflict_idx = i;
|
1104
|
+
}
|
1105
|
+
return;
|
1106
|
+
}
|
1097
1107
|
|
1098
1108
|
// Scan the two row IDs in the leaf.
|
1099
1109
|
Iterator it(*this);
|
1100
1110
|
it.FindMinimum(leaf);
|
1101
1111
|
ARTKey empty_key = ARTKey();
|
1102
1112
|
unsafe_vector<row_t> row_ids;
|
1103
|
-
it.Scan(empty_key, 2, row_ids, false);
|
1113
|
+
auto success = it.Scan(empty_key, 2, row_ids, false);
|
1114
|
+
if (!success || row_ids.size() != 2) {
|
1115
|
+
throw InternalException("VerifyLeaf expects exactly two row IDs to be scanned");
|
1116
|
+
}
|
1104
1117
|
|
1105
1118
|
if (!deleted_leaf) {
|
1106
1119
|
if (manager.AddHit(i, row_ids[0]) || manager.AddHit(i, row_ids[1])) {
|
@@ -46,9 +46,11 @@ bool Iterator::Scan(const ARTKey &upper_bound, const idx_t max_count, unsafe_vec
|
|
46
46
|
bool has_next;
|
47
47
|
do {
|
48
48
|
// An empty upper bound indicates that no upper bound exists.
|
49
|
-
if (!upper_bound.Empty()
|
50
|
-
if (
|
51
|
-
|
49
|
+
if (!upper_bound.Empty()) {
|
50
|
+
if (status == GateStatus::GATE_NOT_SET || entered_nested_leaf) {
|
51
|
+
if (current_key.GreaterThan(upper_bound, equal, nested_depth)) {
|
52
|
+
return true;
|
53
|
+
}
|
52
54
|
}
|
53
55
|
}
|
54
56
|
|
@@ -86,6 +88,7 @@ bool Iterator::Scan(const ARTKey &upper_bound, const idx_t max_count, unsafe_vec
|
|
86
88
|
throw InternalException("Invalid leaf type for index scan.");
|
87
89
|
}
|
88
90
|
|
91
|
+
entered_nested_leaf = false;
|
89
92
|
has_next = Next();
|
90
93
|
} while (has_next);
|
91
94
|
return true;
|
@@ -104,6 +107,7 @@ void Iterator::FindMinimum(const Node &node) {
|
|
104
107
|
if (node.GetGateStatus() == GateStatus::GATE_SET) {
|
105
108
|
D_ASSERT(status == GateStatus::GATE_NOT_SET);
|
106
109
|
status = GateStatus::GATE_SET;
|
110
|
+
entered_nested_leaf = true;
|
107
111
|
nested_depth = 0;
|
108
112
|
}
|
109
113
|
|
@@ -575,6 +575,11 @@ public:
|
|
575
575
|
|
576
576
|
explicit WindowLocalSourceState(WindowGlobalSourceState &gsource);
|
577
577
|
|
578
|
+
void ReleaseLocalStates() {
|
579
|
+
auto &local_states = window_hash_group->thread_states.at(task->thread_idx);
|
580
|
+
local_states.clear();
|
581
|
+
}
|
582
|
+
|
578
583
|
//! Does the task have more work to do?
|
579
584
|
bool TaskFinished() const {
|
580
585
|
return !task || task->begin_idx == task->end_idx;
|
@@ -792,6 +797,12 @@ void WindowGlobalSourceState::FinishTask(TaskPtr task) {
|
|
792
797
|
}
|
793
798
|
|
794
799
|
bool WindowLocalSourceState::TryAssignTask() {
|
800
|
+
D_ASSERT(TaskFinished());
|
801
|
+
if (task && task->stage == WindowGroupStage::GETDATA) {
|
802
|
+
// If this state completed the last block in the previous iteration,
|
803
|
+
// release out local state memory.
|
804
|
+
ReleaseLocalStates();
|
805
|
+
}
|
795
806
|
// Because downstream operators may be using our internal buffers,
|
796
807
|
// we can't "finish" a task until we are about to get the next one.
|
797
808
|
|
@@ -888,10 +899,6 @@ void WindowLocalSourceState::GetData(DataChunk &result) {
|
|
888
899
|
++task->begin_idx;
|
889
900
|
}
|
890
901
|
|
891
|
-
// If that was the last block, release out local state memory.
|
892
|
-
if (TaskFinished()) {
|
893
|
-
local_states.clear();
|
894
|
-
}
|
895
902
|
result.Verify();
|
896
903
|
}
|
897
904
|
|
@@ -4,7 +4,7 @@
|
|
4
4
|
namespace duckdb {
|
5
5
|
|
6
6
|
CSVBuffer::CSVBuffer(ClientContext &context, idx_t buffer_size_p, CSVFileHandle &file_handle,
|
7
|
-
idx_t &global_csv_current_position, idx_t file_number_p)
|
7
|
+
const idx_t &global_csv_current_position, idx_t file_number_p)
|
8
8
|
: context(context), requested_size(buffer_size_p), file_number(file_number_p), can_seek(file_handle.CanSeek()),
|
9
9
|
is_pipe(file_handle.IsPipe()) {
|
10
10
|
AllocateBuffer(buffer_size_p);
|
@@ -34,7 +34,7 @@ CSVBuffer::CSVBuffer(CSVFileHandle &file_handle, ClientContext &context, idx_t b
|
|
34
34
|
}
|
35
35
|
|
36
36
|
shared_ptr<CSVBuffer> CSVBuffer::Next(CSVFileHandle &file_handle, idx_t buffer_size, idx_t file_number_p,
|
37
|
-
bool &has_seaked) {
|
37
|
+
bool &has_seaked) const {
|
38
38
|
if (has_seaked) {
|
39
39
|
// This means that at some point a reload was done, and we are currently on the incorrect position in our file
|
40
40
|
// handle
|
@@ -36,7 +36,7 @@ void CSVEncoderBuffer::Reset() {
|
|
36
36
|
actual_encoded_buffer_size = 0;
|
37
37
|
}
|
38
38
|
|
39
|
-
CSVEncoder::CSVEncoder(DBConfig &config, const string &encoding_name_to_find, idx_t buffer_size) {
|
39
|
+
CSVEncoder::CSVEncoder(const DBConfig &config, const string &encoding_name_to_find, idx_t buffer_size) {
|
40
40
|
encoding_name = StringUtil::Lower(encoding_name_to_find);
|
41
41
|
auto function = config.GetEncodeFunction(encoding_name_to_find);
|
42
42
|
if (!function) {
|
@@ -51,6 +51,10 @@ CSVEncoder::CSVEncoder(DBConfig &config, const string &encoding_name_to_find, id
|
|
51
51
|
}
|
52
52
|
// We ensure that the encoded buffer size is an even number to make the two byte lookup on utf-16 work
|
53
53
|
idx_t encoded_buffer_size = buffer_size % 2 != 0 ? buffer_size - 1 : buffer_size;
|
54
|
+
if (encoded_buffer_size == 0) {
|
55
|
+
// This might happen if buffer size = 1
|
56
|
+
encoded_buffer_size = 2;
|
57
|
+
}
|
54
58
|
D_ASSERT(encoded_buffer_size > 0);
|
55
59
|
encoded_buffer.Initialize(encoded_buffer_size);
|
56
60
|
remaining_bytes_buffer.Initialize(function->GetBytesPerIteration());
|
@@ -11,9 +11,10 @@ ScannerResult::ScannerResult(CSVStates &states_p, CSVStateMachine &state_machine
|
|
11
11
|
|
12
12
|
BaseScanner::BaseScanner(shared_ptr<CSVBufferManager> buffer_manager_p, shared_ptr<CSVStateMachine> state_machine_p,
|
13
13
|
shared_ptr<CSVErrorHandler> error_handler_p, bool sniffing_p,
|
14
|
-
shared_ptr<CSVFileScan> csv_file_scan_p, CSVIterator iterator_p)
|
14
|
+
shared_ptr<CSVFileScan> csv_file_scan_p, const CSVIterator &iterator_p)
|
15
15
|
: csv_file_scan(std::move(csv_file_scan_p)), sniffing(sniffing_p), error_handler(std::move(error_handler_p)),
|
16
|
-
state_machine(std::move(state_machine_p)), buffer_manager(std::move(buffer_manager_p)),
|
16
|
+
state_machine(std::move(state_machine_p)), states(), buffer_manager(std::move(buffer_manager_p)),
|
17
|
+
iterator(iterator_p) {
|
17
18
|
D_ASSERT(buffer_manager);
|
18
19
|
D_ASSERT(state_machine);
|
19
20
|
// Initialize current buffer handle
|
@@ -76,8 +76,8 @@ void CSVSchema::MergeSchemas(CSVSchema &other, bool null_padding) {
|
|
76
76
|
}
|
77
77
|
}
|
78
78
|
|
79
|
-
CSVSchema::CSVSchema(vector<string> &names, vector<LogicalType> &types, const string &file_path,
|
80
|
-
const bool empty_p)
|
79
|
+
CSVSchema::CSVSchema(const vector<string> &names, const vector<LogicalType> &types, const string &file_path,
|
80
|
+
idx_t rows_read_p, const bool empty_p)
|
81
81
|
: rows_read(rows_read_p), empty(empty_p) {
|
82
82
|
Initialize(names, types, file_path);
|
83
83
|
}
|
@@ -13,7 +13,7 @@ CSVBoundary::CSVBoundary(idx_t buffer_idx_p, idx_t buffer_pos_p, idx_t boundary_
|
|
13
13
|
CSVBoundary::CSVBoundary() : buffer_idx(0), buffer_pos(0), boundary_idx(0), end_pos(NumericLimits<idx_t>::Maximum()) {
|
14
14
|
}
|
15
15
|
|
16
|
-
CSVIterator::CSVIterator() : is_set(false) {
|
16
|
+
CSVIterator::CSVIterator() : buffer_size(0), is_set(false) {
|
17
17
|
}
|
18
18
|
|
19
19
|
void CSVBoundary::Print() const {
|
@@ -688,23 +688,29 @@ bool LineError::HandleErrors(StringValueResult &result) {
|
|
688
688
|
line_pos.GetGlobalPosition(result.requested_size), result.path);
|
689
689
|
}
|
690
690
|
break;
|
691
|
-
case CAST_ERROR:
|
691
|
+
case CAST_ERROR: {
|
692
|
+
string column_name;
|
693
|
+
LogicalTypeId type_id;
|
694
|
+
if (cur_error.col_idx < result.names.size()) {
|
695
|
+
column_name = result.names[cur_error.col_idx];
|
696
|
+
}
|
697
|
+
if (cur_error.col_idx < result.number_of_columns) {
|
698
|
+
type_id = result.parse_types[cur_error.chunk_idx].type_id;
|
699
|
+
}
|
692
700
|
if (result.current_line_position.begin == line_pos) {
|
693
701
|
csv_error = CSVError::CastError(
|
694
|
-
result.state_machine.options,
|
695
|
-
|
702
|
+
result.state_machine.options, column_name, cur_error.error_message, cur_error.col_idx, borked_line,
|
703
|
+
lines_per_batch,
|
696
704
|
result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
|
697
|
-
line_pos.GetGlobalPosition(result.requested_size, first_nl),
|
698
|
-
result.parse_types[cur_error.chunk_idx].type_id, result.path);
|
705
|
+
line_pos.GetGlobalPosition(result.requested_size, first_nl), type_id, result.path);
|
699
706
|
} else {
|
700
707
|
csv_error = CSVError::CastError(
|
701
|
-
result.state_machine.options,
|
702
|
-
|
708
|
+
result.state_machine.options, column_name, cur_error.error_message, cur_error.col_idx, borked_line,
|
709
|
+
lines_per_batch,
|
703
710
|
result.current_line_position.begin.GetGlobalPosition(result.requested_size, first_nl),
|
704
|
-
line_pos.GetGlobalPosition(result.requested_size), result.
|
705
|
-
result.path);
|
711
|
+
line_pos.GetGlobalPosition(result.requested_size), type_id, result.path);
|
706
712
|
}
|
707
|
-
|
713
|
+
} break;
|
708
714
|
case MAXIMUM_LINE_SIZE:
|
709
715
|
csv_error = CSVError::LineSizeError(
|
710
716
|
result.state_machine.options, lines_per_batch, borked_line,
|
@@ -964,7 +970,8 @@ StringValueScanner::StringValueScanner(idx_t scanner_idx_p, const shared_ptr<CSV
|
|
964
970
|
result(states, *state_machine, cur_buffer_handle, BufferAllocator::Get(buffer_manager->context), result_size,
|
965
971
|
iterator.pos.buffer_pos, *error_handler, iterator,
|
966
972
|
buffer_manager->context.client_data->debug_set_max_line_length, csv_file_scan, lines_read, sniffing,
|
967
|
-
buffer_manager->GetFilePath(), scanner_idx_p)
|
973
|
+
buffer_manager->GetFilePath(), scanner_idx_p),
|
974
|
+
start_pos(0) {
|
968
975
|
iterator.buffer_size = state_machine->options.buffer_size_option.GetValue();
|
969
976
|
}
|
970
977
|
|
@@ -976,7 +983,8 @@ StringValueScanner::StringValueScanner(const shared_ptr<CSVBufferManager> &buffe
|
|
976
983
|
result(states, *state_machine, cur_buffer_handle, Allocator::DefaultAllocator(), result_size,
|
977
984
|
iterator.pos.buffer_pos, *error_handler, iterator,
|
978
985
|
buffer_manager->context.client_data->debug_set_max_line_length, csv_file_scan, lines_read, sniffing,
|
979
|
-
buffer_manager->GetFilePath(), 0)
|
986
|
+
buffer_manager->GetFilePath(), 0),
|
987
|
+
start_pos(0) {
|
980
988
|
iterator.buffer_size = state_machine->options.buffer_size_option.GetValue();
|
981
989
|
}
|
982
990
|
|