duckdb 1.1.0 → 1.1.1-dev3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -1
- package/package.json +1 -1
- package/src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp +1 -1
- package/src/duckdb/extension/json/include/json_common.hpp +14 -4
- package/src/duckdb/extension/json/include/json_executors.hpp +11 -3
- package/src/duckdb/extension/json/json_extension.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_extract.cpp +11 -3
- package/src/duckdb/extension/json/json_functions/json_value.cpp +4 -3
- package/src/duckdb/extension/json/json_functions.cpp +16 -7
- package/src/duckdb/extension/parquet/column_reader.cpp +3 -0
- package/src/duckdb/extension/parquet/column_writer.cpp +54 -43
- package/src/duckdb/extension/parquet/geo_parquet.cpp +19 -0
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +10 -6
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +3 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +2 -1
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +1 -0
- package/src/duckdb/src/common/arrow/arrow_util.cpp +60 -0
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +1 -53
- package/src/duckdb/src/common/cgroups.cpp +15 -24
- package/src/duckdb/src/common/constants.cpp +8 -0
- package/src/duckdb/src/common/enum_util.cpp +331 -326
- package/src/duckdb/src/common/http_util.cpp +5 -1
- package/src/duckdb/src/common/operator/cast_operators.cpp +6 -60
- package/src/duckdb/src/common/types/bit.cpp +1 -1
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +18 -1
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +2 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +5 -0
- package/src/duckdb/src/core_functions/aggregate/distributive/arg_min_max.cpp +1 -1
- package/src/duckdb/src/core_functions/aggregate/distributive/minmax.cpp +2 -1
- package/src/duckdb/src/execution/index/art/iterator.cpp +17 -15
- package/src/duckdb/src/execution/index/art/prefix.cpp +9 -34
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +4 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +1 -0
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/base_scanner.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +23 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +33 -4
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +23 -13
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +23 -19
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +12 -11
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -14
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +4 -4
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +3 -1
- package/src/duckdb/src/execution/operator/join/physical_piecewise_merge_join.cpp +6 -1
- package/src/duckdb/src/function/cast/decimal_cast.cpp +33 -3
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +9 -0
- package/src/duckdb/src/function/table/arrow.cpp +34 -22
- package/src/duckdb/src/function/table/sniff_csv.cpp +4 -1
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +31 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_wrapper.hpp +2 -16
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +60 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +0 -1
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +2 -1
- package/src/duckdb/src/include/duckdb/core_functions/aggregate/minmax_n_helpers.hpp +9 -5
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +5 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +5 -5
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_result_collector.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +11 -0
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/extension.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +14 -5
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/settings.hpp +4 -2
- package/src/duckdb/src/include/duckdb/parser/keyword_helper.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +7 -1
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/select_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -1
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +7 -4
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +4 -4
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -4
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +4 -2
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/transaction_manager.hpp +1 -1
- package/src/duckdb/src/include/duckdb.h +8 -8
- package/src/duckdb/src/main/appender.cpp +1 -1
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +3 -3
- package/src/duckdb/src/main/capi/helper-c.cpp +4 -0
- package/src/duckdb/src/main/config.cpp +24 -11
- package/src/duckdb/src/main/database.cpp +6 -5
- package/src/duckdb/src/main/extension/extension_install.cpp +13 -8
- package/src/duckdb/src/main/extension/extension_load.cpp +10 -4
- package/src/duckdb/src/main/extension.cpp +1 -1
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +10 -1
- package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +9 -5
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +14 -8
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +2 -0
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -0
- package/src/duckdb/src/optimizer/optimizer.cpp +4 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_cross_product.cpp +1 -11
- package/src/duckdb/src/optimizer/pushdown/pushdown_inner_join.cpp +1 -7
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_cast.cpp +3 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +1 -0
- package/src/duckdb/src/parser/keyword_helper.cpp +4 -0
- package/src/duckdb/src/parser/parser.cpp +20 -18
- package/src/duckdb/src/parser/transform/statement/transform_select_node.cpp +8 -3
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +3 -0
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +7 -1
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +13 -0
- package/src/duckdb/src/planner/binder/statement/bind_copy_database.cpp +7 -11
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +27 -10
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +24 -9
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +1 -3
- package/src/duckdb/src/planner/binder.cpp +5 -6
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +1 -0
- package/src/duckdb/src/planner/expression_binder/select_binder.cpp +9 -0
- package/src/duckdb/src/planner/operator/logical_copy_to_file.cpp +2 -2
- package/src/duckdb/src/planner/operator/logical_positional_join.cpp +1 -0
- package/src/duckdb/src/storage/buffer/block_handle.cpp +18 -21
- package/src/duckdb/src/storage/buffer/block_manager.cpp +12 -4
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +12 -2
- package/src/duckdb/src/storage/buffer_manager.cpp +3 -2
- package/src/duckdb/src/storage/compression/rle.cpp +5 -2
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +2 -1
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +8 -7
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +19 -20
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +1 -2
- package/src/duckdb/src/storage/table/column_data.cpp +5 -2
- package/src/duckdb/src/storage/table/column_segment.cpp +2 -2
- package/src/duckdb/src/storage/table/row_group_collection.cpp +18 -14
- package/src/duckdb/src/storage/table/standard_column_data.cpp +3 -3
- package/src/duckdb/src/storage/wal_replay.cpp +2 -3
- package/src/duckdb/third_party/libpg_query/include/common/keywords.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/include/parser/parser.hpp +1 -2
- package/src/duckdb/third_party/libpg_query/include/pg_simplified_token.hpp +6 -4
- package/src/duckdb/third_party/libpg_query/include/postgres_parser.hpp +1 -1
- package/src/duckdb/third_party/libpg_query/postgres_parser.cpp +1 -1
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +801 -799
- package/src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp +6 -2
- package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +0 -1
- package/src/duckdb/ub_src_common_arrow.cpp +2 -0
- package/vendor.py +1 -2
@@ -6,7 +6,11 @@
|
|
6
6
|
namespace duckdb {
|
7
7
|
|
8
8
|
void HTTPUtil::ParseHTTPProxyHost(string &proxy_value, string &hostname_out, idx_t &port_out, idx_t default_port) {
|
9
|
-
auto
|
9
|
+
auto sanitized_proxy_value = proxy_value;
|
10
|
+
if (StringUtil::StartsWith(proxy_value, "http://")) {
|
11
|
+
sanitized_proxy_value = proxy_value.substr(7);
|
12
|
+
}
|
13
|
+
auto proxy_split = StringUtil::Split(sanitized_proxy_value, ":");
|
10
14
|
if (proxy_split.size() == 1) {
|
11
15
|
hostname_out = proxy_split[0];
|
12
16
|
port_out = default_port;
|
@@ -920,68 +920,12 @@ bool TryCast::Operation(double input, double &result, bool strict) {
|
|
920
920
|
//===--------------------------------------------------------------------===//
|
921
921
|
// Cast String -> Numeric
|
922
922
|
//===--------------------------------------------------------------------===//
|
923
|
+
|
923
924
|
template <>
|
924
925
|
bool TryCast::Operation(string_t input, bool &result, bool strict) {
|
925
|
-
auto input_data = reinterpret_cast<const
|
926
|
+
auto input_data = reinterpret_cast<const char *>(input.GetData());
|
926
927
|
auto input_size = input.GetSize();
|
927
|
-
|
928
|
-
switch (input_size) {
|
929
|
-
case 1: {
|
930
|
-
unsigned char c = UnsafeNumericCast<uint8_t>(std::tolower(*input_data));
|
931
|
-
if (c == 't' || (!strict && c == 'y') || (!strict && c == '1')) {
|
932
|
-
result = true;
|
933
|
-
return true;
|
934
|
-
} else if (c == 'f' || (!strict && c == 'n') || (!strict && c == '0')) {
|
935
|
-
result = false;
|
936
|
-
return true;
|
937
|
-
}
|
938
|
-
return false;
|
939
|
-
}
|
940
|
-
case 2: {
|
941
|
-
unsigned char n = UnsafeNumericCast<uint8_t>(std::tolower(input_data[0]));
|
942
|
-
unsigned char o = UnsafeNumericCast<uint8_t>(std::tolower(input_data[1]));
|
943
|
-
if (n == 'n' && o == 'o') {
|
944
|
-
result = false;
|
945
|
-
return true;
|
946
|
-
}
|
947
|
-
return false;
|
948
|
-
}
|
949
|
-
case 3: {
|
950
|
-
unsigned char y = UnsafeNumericCast<uint8_t>(std::tolower(input_data[0]));
|
951
|
-
unsigned char e = UnsafeNumericCast<uint8_t>(std::tolower(input_data[1]));
|
952
|
-
unsigned char s = UnsafeNumericCast<uint8_t>(std::tolower(input_data[2]));
|
953
|
-
if (y == 'y' && e == 'e' && s == 's') {
|
954
|
-
result = true;
|
955
|
-
return true;
|
956
|
-
}
|
957
|
-
return false;
|
958
|
-
}
|
959
|
-
case 4: {
|
960
|
-
unsigned char t = UnsafeNumericCast<uint8_t>(std::tolower(input_data[0]));
|
961
|
-
unsigned char r = UnsafeNumericCast<uint8_t>(std::tolower(input_data[1]));
|
962
|
-
unsigned char u = UnsafeNumericCast<uint8_t>(std::tolower(input_data[2]));
|
963
|
-
unsigned char e = UnsafeNumericCast<uint8_t>(std::tolower(input_data[3]));
|
964
|
-
if (t == 't' && r == 'r' && u == 'u' && e == 'e') {
|
965
|
-
result = true;
|
966
|
-
return true;
|
967
|
-
}
|
968
|
-
return false;
|
969
|
-
}
|
970
|
-
case 5: {
|
971
|
-
unsigned char f = UnsafeNumericCast<uint8_t>(std::tolower(input_data[0]));
|
972
|
-
unsigned char a = UnsafeNumericCast<uint8_t>(std::tolower(input_data[1]));
|
973
|
-
unsigned char l = UnsafeNumericCast<uint8_t>(std::tolower(input_data[2]));
|
974
|
-
unsigned char s = UnsafeNumericCast<uint8_t>(std::tolower(input_data[3]));
|
975
|
-
unsigned char e = UnsafeNumericCast<uint8_t>(std::tolower(input_data[4]));
|
976
|
-
if (f == 'f' && a == 'a' && l == 'l' && s == 's' && e == 'e') {
|
977
|
-
result = false;
|
978
|
-
return true;
|
979
|
-
}
|
980
|
-
return false;
|
981
|
-
}
|
982
|
-
default:
|
983
|
-
return false;
|
984
|
-
}
|
928
|
+
return TryCastStringBool(input_data, input_size, result, strict);
|
985
929
|
}
|
986
930
|
template <>
|
987
931
|
bool TryCast::Operation(string_t input, int8_t &result, bool strict) {
|
@@ -2298,7 +2242,9 @@ bool TryCastToDecimal::Operation(uhugeint_t input, hugeint_t &result, CastParame
|
|
2298
2242
|
template <class SRC, class DST>
|
2299
2243
|
bool DoubleToDecimalCast(SRC input, DST &result, CastParameters ¶meters, uint8_t width, uint8_t scale) {
|
2300
2244
|
double value = input * NumericHelper::DOUBLE_POWERS_OF_TEN[scale];
|
2301
|
-
|
2245
|
+
double roundedValue = round(value);
|
2246
|
+
if (roundedValue <= -NumericHelper::DOUBLE_POWERS_OF_TEN[width] ||
|
2247
|
+
roundedValue >= NumericHelper::DOUBLE_POWERS_OF_TEN[width]) {
|
2302
2248
|
string error = StringUtil::Format("Could not cast value %f to DECIMAL(%d,%d)", value, width, scale);
|
2303
2249
|
HandleCastError::AssignError(error, parameters);
|
2304
2250
|
return false;
|
@@ -180,7 +180,7 @@ void Bit::BitToBlob(string_t bit, string_t &output_blob) {
|
|
180
180
|
idx_t size = output_blob.GetSize();
|
181
181
|
|
182
182
|
output[0] = UnsafeNumericCast<char>(GetFirstByte(bit));
|
183
|
-
if (size
|
183
|
+
if (size >= 2) {
|
184
184
|
++output;
|
185
185
|
// First byte in bitstring contains amount of padded bits,
|
186
186
|
// second byte in bitstring is the padded byte,
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
#include "duckdb/common/types/column/column_data_collection_segment.hpp"
|
4
4
|
#include "duckdb/storage/buffer/block_handle.hpp"
|
5
|
+
#include "duckdb/storage/buffer/buffer_pool.hpp"
|
5
6
|
#include "duckdb/storage/buffer_manager.hpp"
|
6
7
|
|
7
8
|
namespace duckdb {
|
@@ -45,6 +46,21 @@ ColumnDataAllocator::ColumnDataAllocator(ColumnDataAllocator &other) {
|
|
45
46
|
}
|
46
47
|
}
|
47
48
|
|
49
|
+
ColumnDataAllocator::~ColumnDataAllocator() {
|
50
|
+
if (type == ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR) {
|
51
|
+
return;
|
52
|
+
}
|
53
|
+
for (auto &block : blocks) {
|
54
|
+
block.handle->SetDestroyBufferUpon(DestroyBufferUpon::UNPIN);
|
55
|
+
}
|
56
|
+
const auto data_size = SizeInBytes();
|
57
|
+
blocks.clear();
|
58
|
+
if (Allocator::SupportsFlush() &&
|
59
|
+
data_size > alloc.buffer_manager->GetBufferPool().GetAllocatorBulkDeallocationFlushThreshold()) {
|
60
|
+
Allocator::FlushAll();
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
48
64
|
BufferHandle ColumnDataAllocator::Pin(uint32_t block_id) {
|
49
65
|
D_ASSERT(type == ColumnDataAllocatorType::BUFFER_MANAGER_ALLOCATOR || type == ColumnDataAllocatorType::HYBRID);
|
50
66
|
shared_ptr<BlockHandle> handle;
|
@@ -65,7 +81,8 @@ BufferHandle ColumnDataAllocator::AllocateBlock(idx_t size) {
|
|
65
81
|
BlockMetaData data;
|
66
82
|
data.size = 0;
|
67
83
|
data.capacity = NumericCast<uint32_t>(max_size);
|
68
|
-
auto pin = alloc.buffer_manager->Allocate(MemoryTag::COLUMN_DATA, max_size, false
|
84
|
+
auto pin = alloc.buffer_manager->Allocate(MemoryTag::COLUMN_DATA, max_size, false);
|
85
|
+
data.handle = pin.GetBlockHandle();
|
69
86
|
blocks.push_back(std::move(data));
|
70
87
|
allocated_size += max_size;
|
71
88
|
return pin;
|
@@ -11,7 +11,8 @@ namespace duckdb {
|
|
11
11
|
using ValidityBytes = TupleDataLayout::ValidityBytes;
|
12
12
|
|
13
13
|
TupleDataBlock::TupleDataBlock(BufferManager &buffer_manager, idx_t capacity_p) : capacity(capacity_p), size(0) {
|
14
|
-
buffer_manager.Allocate(MemoryTag::HASH_TABLE, capacity, false
|
14
|
+
auto buffer_handle = buffer_manager.Allocate(MemoryTag::HASH_TABLE, capacity, false);
|
15
|
+
handle = buffer_handle.GetBlockHandle();
|
15
16
|
}
|
16
17
|
|
17
18
|
TupleDataBlock::TupleDataBlock(TupleDataBlock &&other) noexcept : capacity(0), size(0) {
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "duckdb/common/types/row/tuple_data_segment.hpp"
|
2
2
|
|
3
3
|
#include "duckdb/common/types/row/tuple_data_allocator.hpp"
|
4
|
+
#include "duckdb/storage/buffer/buffer_pool.hpp"
|
4
5
|
|
5
6
|
namespace duckdb {
|
6
7
|
|
@@ -118,6 +119,10 @@ TupleDataSegment::~TupleDataSegment() {
|
|
118
119
|
}
|
119
120
|
pinned_row_handles.clear();
|
120
121
|
pinned_heap_handles.clear();
|
122
|
+
if (Allocator::SupportsFlush() && allocator &&
|
123
|
+
data_size > allocator->GetBufferManager().GetBufferPool().GetAllocatorBulkDeallocationFlushThreshold()) {
|
124
|
+
Allocator::FlushAll();
|
125
|
+
}
|
121
126
|
allocator.reset();
|
122
127
|
}
|
123
128
|
|
@@ -192,7 +192,7 @@ struct GenericArgMinMaxState {
|
|
192
192
|
|
193
193
|
static void PrepareData(Vector &by, idx_t count, Vector &extra_state, UnifiedVectorFormat &result) {
|
194
194
|
OrderModifiers modifiers(ORDER_TYPE, OrderByNullType::NULLS_LAST);
|
195
|
-
CreateSortKeyHelpers::
|
195
|
+
CreateSortKeyHelpers::CreateSortKeyWithValidity(by, extra_state, modifiers, count);
|
196
196
|
extra_state.ToUnifiedFormat(count, result);
|
197
197
|
}
|
198
198
|
};
|
@@ -4,6 +4,7 @@
|
|
4
4
|
#include "duckdb/common/vector_operations/vector_operations.hpp"
|
5
5
|
#include "duckdb/common/operator/comparison_operators.hpp"
|
6
6
|
#include "duckdb/common/types/null_value.hpp"
|
7
|
+
#include "duckdb/main/config.hpp"
|
7
8
|
#include "duckdb/planner/expression.hpp"
|
8
9
|
#include "duckdb/planner/expression/bound_comparison_expression.hpp"
|
9
10
|
#include "duckdb/planner/expression_binder.hpp"
|
@@ -330,7 +331,7 @@ unique_ptr<FunctionData> BindMinMax(ClientContext &context, AggregateFunction &f
|
|
330
331
|
vector<unique_ptr<Expression>> &arguments) {
|
331
332
|
if (arguments[0]->return_type.id() == LogicalTypeId::VARCHAR) {
|
332
333
|
auto str_collation = StringType::GetCollation(arguments[0]->return_type);
|
333
|
-
if (!str_collation.empty()) {
|
334
|
+
if (!str_collation.empty() || !DBConfig::GetConfig(context).options.collation.empty()) {
|
334
335
|
// If aggr function is min/max and uses collations, replace bound_function with arg_min/arg_max
|
335
336
|
// to make sure the result's correctness.
|
336
337
|
string function_name = function.name == "min" ? "arg_min" : "arg_max";
|
@@ -251,11 +251,7 @@ bool Iterator::Next() {
|
|
251
251
|
}
|
252
252
|
|
253
253
|
void Iterator::PopNode() {
|
254
|
-
|
255
|
-
if (nodes.top().node.GetGateStatus() == GateStatus::GATE_SET) {
|
256
|
-
D_ASSERT(status == GateStatus::GATE_SET);
|
257
|
-
status = GateStatus::GATE_NOT_SET;
|
258
|
-
}
|
254
|
+
auto gate_status = nodes.top().node.GetGateStatus();
|
259
255
|
|
260
256
|
// Pop the byte and the node.
|
261
257
|
if (nodes.top().node.GetType() != NType::PREFIX) {
|
@@ -264,19 +260,25 @@ void Iterator::PopNode() {
|
|
264
260
|
nested_depth--;
|
265
261
|
D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE);
|
266
262
|
}
|
267
|
-
nodes.pop();
|
268
|
-
return;
|
269
|
-
}
|
270
263
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
264
|
+
} else {
|
265
|
+
// Pop all prefix bytes and the node.
|
266
|
+
Prefix prefix(art, nodes.top().node);
|
267
|
+
auto prefix_byte_count = prefix.data[Prefix::Count(art)];
|
268
|
+
current_key.Pop(prefix_byte_count);
|
269
|
+
|
270
|
+
if (status == GateStatus::GATE_SET) {
|
271
|
+
nested_depth -= prefix_byte_count;
|
272
|
+
D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE);
|
273
|
+
}
|
278
274
|
}
|
279
275
|
nodes.pop();
|
276
|
+
|
277
|
+
// We are popping a gate node.
|
278
|
+
if (gate_status == GateStatus::GATE_SET) {
|
279
|
+
D_ASSERT(status == GateStatus::GATE_SET);
|
280
|
+
status = GateStatus::GATE_NOT_SET;
|
281
|
+
}
|
280
282
|
}
|
281
283
|
|
282
284
|
} // namespace duckdb
|
@@ -400,42 +400,15 @@ void Prefix::TransformToDeprecated(ART &art, Node &node, unsafe_unique_ptr<Fixed
|
|
400
400
|
return Node::TransformToDeprecated(art, ref, allocator);
|
401
401
|
}
|
402
402
|
|
403
|
-
//
|
404
|
-
if (art.prefix_count <= DEPRECATED_COUNT) {
|
405
|
-
reference<Node> ref(node);
|
406
|
-
while (ref.get().GetType() == PREFIX && ref.get().GetGateStatus() == GateStatus::GATE_NOT_SET) {
|
407
|
-
Prefix prefix(art, ref, true, true);
|
408
|
-
if (!prefix.in_memory) {
|
409
|
-
return;
|
410
|
-
}
|
411
|
-
|
412
|
-
Node new_node;
|
413
|
-
new_node = allocator->New();
|
414
|
-
new_node.SetMetadata(static_cast<uint8_t>(PREFIX));
|
415
|
-
|
416
|
-
Prefix new_prefix(allocator, new_node, DEPRECATED_COUNT);
|
417
|
-
new_prefix.data[DEPRECATED_COUNT] = prefix.data[Count(art)];
|
418
|
-
memcpy(new_prefix.data, prefix.data, new_prefix.data[DEPRECATED_COUNT]);
|
419
|
-
*new_prefix.ptr = *prefix.ptr;
|
420
|
-
|
421
|
-
prefix.ptr->Clear();
|
422
|
-
Node::Free(art, ref);
|
423
|
-
ref.get() = new_node;
|
424
|
-
ref = *new_prefix.ptr;
|
425
|
-
}
|
426
|
-
|
427
|
-
return Node::TransformToDeprecated(art, ref, allocator);
|
428
|
-
}
|
429
|
-
|
430
|
-
// Else, we need to create a new prefix chain.
|
403
|
+
// We need to create a new prefix (chain).
|
431
404
|
Node new_node;
|
432
405
|
new_node = allocator->New();
|
433
406
|
new_node.SetMetadata(static_cast<uint8_t>(PREFIX));
|
434
407
|
Prefix new_prefix(allocator, new_node, DEPRECATED_COUNT);
|
435
408
|
|
436
|
-
|
437
|
-
while (
|
438
|
-
Prefix prefix(art,
|
409
|
+
Node current_node = node;
|
410
|
+
while (current_node.GetType() == PREFIX && current_node.GetGateStatus() == GateStatus::GATE_NOT_SET) {
|
411
|
+
Prefix prefix(art, current_node, true, true);
|
439
412
|
if (!prefix.in_memory) {
|
440
413
|
return;
|
441
414
|
}
|
@@ -445,11 +418,13 @@ void Prefix::TransformToDeprecated(ART &art, Node &node, unsafe_unique_ptr<Fixed
|
|
445
418
|
}
|
446
419
|
|
447
420
|
*new_prefix.ptr = *prefix.ptr;
|
448
|
-
|
449
|
-
|
421
|
+
prefix.ptr->Clear();
|
422
|
+
Node::Free(art, current_node);
|
423
|
+
current_node = *new_prefix.ptr;
|
450
424
|
}
|
451
425
|
|
452
|
-
|
426
|
+
node = new_node;
|
427
|
+
return Node::TransformToDeprecated(art, *new_prefix.ptr, allocator);
|
453
428
|
}
|
454
429
|
|
455
430
|
Prefix Prefix::Append(ART &art, const uint8_t byte) {
|
@@ -40,7 +40,8 @@ FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager)
|
|
40
40
|
block_handle(nullptr) {
|
41
41
|
|
42
42
|
auto &buffer_manager = block_manager.buffer_manager;
|
43
|
-
buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false
|
43
|
+
buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false);
|
44
|
+
block_handle = buffer_handle.GetBlockHandle();
|
44
45
|
}
|
45
46
|
|
46
47
|
FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const idx_t allocation_size,
|
@@ -137,8 +138,8 @@ void FixedSizeBuffer::Pin() {
|
|
137
138
|
|
138
139
|
// Copy the (partial) data into a new (not yet disk-backed) buffer handle.
|
139
140
|
shared_ptr<BlockHandle> new_block_handle;
|
140
|
-
auto new_buffer_handle =
|
141
|
-
|
141
|
+
auto new_buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false);
|
142
|
+
new_block_handle = new_buffer_handle.GetBlockHandle();
|
142
143
|
memcpy(new_buffer_handle.Ptr(), buffer_handle.Ptr() + block_pointer.offset, allocation_size);
|
143
144
|
|
144
145
|
buffer_handle = std::move(new_buffer_handle);
|
@@ -342,6 +342,7 @@ void LocalUngroupedAggregateState::Sink(DataChunk &payload_chunk, idx_t payload_
|
|
342
342
|
#endif
|
343
343
|
auto &aggregate = state.aggregate_expressions[aggr_idx]->Cast<BoundAggregateExpression>();
|
344
344
|
idx_t payload_cnt = aggregate.children.size();
|
345
|
+
D_ASSERT(payload_idx + payload_cnt <= payload_chunk.data.size());
|
345
346
|
auto start_of_input = payload_cnt == 0 ? nullptr : &payload_chunk.data[payload_idx];
|
346
347
|
AggregateInputData aggr_input_data(state.bind_data[aggr_idx], allocator);
|
347
348
|
aggregate.function.simple_update(start_of_input, aggr_input_data, payload_cnt, state.aggregate_data[aggr_idx].get(),
|
@@ -54,7 +54,8 @@ void CSVBuffer::AllocateBuffer(idx_t buffer_size) {
|
|
54
54
|
auto &buffer_manager = BufferManager::GetBufferManager(context);
|
55
55
|
bool can_destroy = !is_pipe;
|
56
56
|
handle = buffer_manager.Allocate(MemoryTag::CSV_READER, MaxValue<idx_t>(buffer_manager.GetBlockSize(), buffer_size),
|
57
|
-
can_destroy
|
57
|
+
can_destroy);
|
58
|
+
block = handle.GetBlockHandle();
|
58
59
|
}
|
59
60
|
|
60
61
|
idx_t CSVBuffer::GetBufferSize() {
|
@@ -25,7 +25,7 @@ BaseScanner::BaseScanner(shared_ptr<CSVBufferManager> buffer_manager_p, shared_p
|
|
25
25
|
}
|
26
26
|
}
|
27
27
|
|
28
|
-
bool BaseScanner::FinishedFile() {
|
28
|
+
bool BaseScanner::FinishedFile() const {
|
29
29
|
if (!cur_buffer_handle) {
|
30
30
|
return true;
|
31
31
|
}
|
@@ -76,7 +76,7 @@ void BaseScanner::FinalizeChunkProcess() {
|
|
76
76
|
throw InternalException("FinalizeChunkProcess() from CSV Base Scanner is not implemented");
|
77
77
|
}
|
78
78
|
|
79
|
-
CSVStateMachine &BaseScanner::GetStateMachine() {
|
79
|
+
CSVStateMachine &BaseScanner::GetStateMachine() const {
|
80
80
|
return *state_machine;
|
81
81
|
}
|
82
82
|
|
@@ -12,10 +12,32 @@ void ColumnCountResult::AddValue(ColumnCountResult &result, idx_t buffer_pos) {
|
|
12
12
|
}
|
13
13
|
|
14
14
|
inline void ColumnCountResult::InternalAddRow() {
|
15
|
-
|
15
|
+
const idx_t column_count = current_column_count + 1;
|
16
|
+
column_counts[result_position].number_of_columns = column_count;
|
17
|
+
rows_per_column_count[column_count]++;
|
16
18
|
current_column_count = 0;
|
17
19
|
}
|
18
20
|
|
21
|
+
idx_t ColumnCountResult::GetMostFrequentColumnCount() const {
|
22
|
+
if (rows_per_column_count.empty()) {
|
23
|
+
return 1;
|
24
|
+
}
|
25
|
+
idx_t column_count = 0;
|
26
|
+
idx_t current_max = 0;
|
27
|
+
for (auto &rpc : rows_per_column_count) {
|
28
|
+
if (rpc.second > current_max) {
|
29
|
+
current_max = rpc.second;
|
30
|
+
column_count = rpc.first;
|
31
|
+
} else if (rpc.second == current_max) {
|
32
|
+
// We pick the largest to untie
|
33
|
+
if (rpc.first > column_count) {
|
34
|
+
column_count = rpc.first;
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}
|
38
|
+
return column_count;
|
39
|
+
}
|
40
|
+
|
19
41
|
bool ColumnCountResult::AddRow(ColumnCountResult &result, idx_t buffer_pos) {
|
20
42
|
result.InternalAddRow();
|
21
43
|
if (!result.states.EmptyLastValue()) {
|
@@ -28,6 +28,11 @@ StringValueResult::StringValueResult(CSVStates &states, CSVStateMachine &state_m
|
|
28
28
|
current_errors(state_machine.options.IgnoreErrors()), sniffing(sniffing_p), path(std::move(path_p)) {
|
29
29
|
// Vector information
|
30
30
|
D_ASSERT(number_of_columns > 0);
|
31
|
+
if (!buffer_handle) {
|
32
|
+
// It Was Over Before It Even Began
|
33
|
+
D_ASSERT(iterator.done);
|
34
|
+
return;
|
35
|
+
}
|
31
36
|
buffer_handles[buffer_handle->buffer_idx] = buffer_handle;
|
32
37
|
// Buffer Information
|
33
38
|
buffer_ptr = buffer_handle->Ptr();
|
@@ -264,6 +269,10 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size
|
|
264
269
|
}
|
265
270
|
bool success = true;
|
266
271
|
switch (parse_types[chunk_col_id].type_id) {
|
272
|
+
case LogicalTypeId::BOOLEAN:
|
273
|
+
success =
|
274
|
+
TryCastStringBool(value_ptr, size, static_cast<bool *>(vector_ptr[chunk_col_id])[number_of_rows], false);
|
275
|
+
break;
|
267
276
|
case LogicalTypeId::TINYINT:
|
268
277
|
success = TrySimpleIntegerCast(value_ptr, size, static_cast<int8_t *>(vector_ptr[chunk_col_id])[number_of_rows],
|
269
278
|
false);
|
@@ -644,9 +653,15 @@ bool LineError::HandleErrors(StringValueResult &result) {
|
|
644
653
|
result.error_handler.Error(csv_error);
|
645
654
|
}
|
646
655
|
if (is_error_in_line) {
|
647
|
-
|
648
|
-
|
649
|
-
|
656
|
+
if (result.sniffing) {
|
657
|
+
// If we are sniffing we just remove the line
|
658
|
+
result.RemoveLastLine();
|
659
|
+
} else {
|
660
|
+
// Otherwise, we add it to the borked rows to remove it later and just cleanup the column variables.
|
661
|
+
result.borked_rows.insert(result.number_of_rows);
|
662
|
+
result.cur_col_id = 0;
|
663
|
+
result.chunk_col_id = 0;
|
664
|
+
}
|
650
665
|
Reset();
|
651
666
|
return true;
|
652
667
|
}
|
@@ -1437,6 +1452,7 @@ bool StringValueScanner::CanDirectlyCast(const LogicalType &type, bool icu_loade
|
|
1437
1452
|
case LogicalTypeId::TIME:
|
1438
1453
|
case LogicalTypeId::DECIMAL:
|
1439
1454
|
case LogicalType::VARCHAR:
|
1455
|
+
case LogicalType::BOOLEAN:
|
1440
1456
|
return true;
|
1441
1457
|
case LogicalType::TIMESTAMP_TZ:
|
1442
1458
|
// We only try to do direct cast of timestamp tz if the ICU extension is not loaded, otherwise, it needs to go
|
@@ -1493,7 +1509,7 @@ void StringValueScanner::SetStart() {
|
|
1493
1509
|
}
|
1494
1510
|
if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size ||
|
1495
1511
|
scan_finder->iterator.GetBufferIdx() > iterator.GetBufferIdx()) {
|
1496
|
-
// If things go terribly wrong, we never loop
|
1512
|
+
// If things go terribly wrong, we never loop indefinitely.
|
1497
1513
|
iterator.pos.buffer_idx = scan_finder->iterator.pos.buffer_idx;
|
1498
1514
|
iterator.pos.buffer_pos = scan_finder->iterator.pos.buffer_pos;
|
1499
1515
|
result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size};
|
@@ -1521,8 +1537,11 @@ void StringValueScanner::FinalizeChunkProcess() {
|
|
1521
1537
|
// If we are not done we have two options.
|
1522
1538
|
// 1) If a boundary is set.
|
1523
1539
|
if (iterator.IsBoundarySet()) {
|
1540
|
+
bool has_unterminated_quotes = false;
|
1524
1541
|
if (!result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
|
1525
1542
|
iterator.done = true;
|
1543
|
+
} else {
|
1544
|
+
has_unterminated_quotes = true;
|
1526
1545
|
}
|
1527
1546
|
// We read until the next line or until we have nothing else to read.
|
1528
1547
|
// Move to next buffer
|
@@ -1540,6 +1559,16 @@ void StringValueScanner::FinalizeChunkProcess() {
|
|
1540
1559
|
MoveToNextBuffer();
|
1541
1560
|
}
|
1542
1561
|
} else {
|
1562
|
+
if (result.current_errors.HasErrorType(UNTERMINATED_QUOTES)) {
|
1563
|
+
has_unterminated_quotes = true;
|
1564
|
+
}
|
1565
|
+
result.current_errors.HandleErrors(result);
|
1566
|
+
}
|
1567
|
+
if (states.IsQuotedCurrent() && !has_unterminated_quotes) {
|
1568
|
+
// If we finish the execution of a buffer, and we end in a quoted state, it means we have unterminated
|
1569
|
+
// quotes
|
1570
|
+
result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id,
|
1571
|
+
result.last_position);
|
1543
1572
|
result.current_errors.HandleErrors(result);
|
1544
1573
|
}
|
1545
1574
|
if (!iterator.done) {
|
@@ -134,7 +134,11 @@ SnifferResult CSVSniffer::MinimalSniff() {
|
|
134
134
|
for (idx_t col_idx = 0; col_idx < data_chunk.ColumnCount(); col_idx++) {
|
135
135
|
auto &cur_vector = data_chunk.data[col_idx];
|
136
136
|
auto vector_data = FlatVector::GetData<string_t>(cur_vector);
|
137
|
-
|
137
|
+
auto &validity = FlatVector::Validity(cur_vector);
|
138
|
+
HeaderValue val;
|
139
|
+
if (validity.RowIsValid(0)) {
|
140
|
+
val = HeaderValue(vector_data[0]);
|
141
|
+
}
|
138
142
|
potential_header.emplace_back(val);
|
139
143
|
}
|
140
144
|
}
|
@@ -221,13 +225,16 @@ SnifferResult CSVSniffer::SniffCSV(bool force_match) {
|
|
221
225
|
// If the header exists it should match
|
222
226
|
string header_error = "The Column names set by the user do not match the ones found by the sniffer. \n";
|
223
227
|
auto &set_names = *set_columns.names;
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
228
|
+
if (set_names.size() == names.size()) {
|
229
|
+
for (idx_t i = 0; i < set_columns.Size(); i++) {
|
230
|
+
if (set_names[i] != names[i]) {
|
231
|
+
header_error += "Column at position: " + to_string(i) + " Set name: " + set_names[i] +
|
232
|
+
" Sniffed Name: " + names[i] + "\n";
|
233
|
+
match = false;
|
234
|
+
}
|
229
235
|
}
|
230
236
|
}
|
237
|
+
|
231
238
|
if (!match) {
|
232
239
|
error += header_error;
|
233
240
|
}
|
@@ -235,15 +242,18 @@ SnifferResult CSVSniffer::SniffCSV(bool force_match) {
|
|
235
242
|
match = true;
|
236
243
|
string type_error = "The Column types set by the user do not match the ones found by the sniffer. \n";
|
237
244
|
auto &set_types = *set_columns.types;
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
+
if (detected_types.size() == set_columns.Size()) {
|
246
|
+
for (idx_t i = 0; i < set_columns.Size(); i++) {
|
247
|
+
if (set_types[i] != detected_types[i]) {
|
248
|
+
type_error += "Column at position: " + to_string(i) + " Set type: " + set_types[i].ToString() +
|
249
|
+
" Sniffed type: " + detected_types[i].ToString() + "\n";
|
250
|
+
detected_types[i] = set_types[i];
|
251
|
+
manually_set[i] = true;
|
252
|
+
match = false;
|
253
|
+
}
|
245
254
|
}
|
246
255
|
}
|
256
|
+
|
247
257
|
if (!match) {
|
248
258
|
error += type_error;
|
249
259
|
}
|