duckdb 1.3.1-dev6.0 → 1.3.2-dev0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/core_functions/aggregate/distributive/arg_min_max.cpp +27 -39
- package/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp +2 -3
- package/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_sort_tree.hpp +1 -1
- package/src/duckdb/extension/core_functions/lambda_functions.cpp +16 -14
- package/src/duckdb/extension/core_functions/scalar/list/list_filter.cpp +3 -2
- package/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp +46 -10
- package/src/duckdb/extension/core_functions/scalar/list/list_transform.cpp +3 -2
- package/src/duckdb/extension/core_functions/scalar/random/random.cpp +3 -1
- package/src/duckdb/extension/icu/icu-datefunc.cpp +5 -3
- package/src/duckdb/extension/icu/icu-strptime.cpp +6 -1
- package/src/duckdb/extension/icu/icu-timezone.cpp +4 -0
- package/src/duckdb/extension/icu/icu_extension.cpp +7 -2
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +1 -1
- package/src/duckdb/extension/icu/include/icu-helpers.hpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uloc.cpp +5 -5
- package/src/duckdb/extension/json/include/json_common.hpp +19 -0
- package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -4
- package/src/duckdb/extension/json/include/json_functions.hpp +4 -4
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +38 -17
- package/src/duckdb/extension/json/json_functions/json_table_in_out.cpp +11 -7
- package/src/duckdb/extension/json/json_functions.cpp +4 -4
- package/src/duckdb/extension/json/json_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/column_reader.cpp +7 -1
- package/src/duckdb/extension/parquet/include/parquet_bss_decoder.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +2 -1
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +3 -0
- package/src/duckdb/extension/parquet/include/writer/parquet_write_operators.hpp +3 -1
- package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +1 -1
- package/src/duckdb/extension/parquet/parquet_crypto.cpp +9 -5
- package/src/duckdb/extension/parquet/parquet_extension.cpp +26 -0
- package/src/duckdb/extension/parquet/parquet_float16.cpp +4 -2
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +3 -3
- package/src/duckdb/extension/parquet/parquet_multi_file_info.cpp +12 -0
- package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -4
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +13 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -1
- package/src/duckdb/extension/parquet/reader/decimal_column_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
- package/src/duckdb/extension/parquet/reader/struct_column_reader.cpp +13 -4
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +2 -0
- package/src/duckdb/src/catalog/catalog.cpp +10 -4
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +4 -10
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -2
- package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_search_path.cpp +7 -1
- package/src/duckdb/src/catalog/catalog_set.cpp +21 -1
- package/src/duckdb/src/common/adbc/adbc.cpp +1 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +17 -5
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +23 -15
- package/src/duckdb/src/common/box_renderer.cpp +1 -2
- package/src/duckdb/src/common/enum_util.cpp +4 -3
- package/src/duckdb/src/common/local_file_system.cpp +13 -12
- package/src/duckdb/src/common/multi_file/multi_file_column_mapper.cpp +35 -12
- package/src/duckdb/src/common/multi_file/multi_file_reader.cpp +13 -3
- package/src/duckdb/src/common/string_util.cpp +7 -5
- package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +4 -4
- package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +4 -4
- package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +4 -4
- package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +4 -4
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +1 -1
- package/src/duckdb/src/common/types/uuid.cpp +5 -1
- package/src/duckdb/src/common/types.cpp +28 -0
- package/src/duckdb/src/common/virtual_file_system.cpp +5 -0
- package/src/duckdb/src/execution/column_binding_resolver.cpp +49 -30
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +4 -0
- package/src/duckdb/src/execution/join_hashtable.cpp +10 -7
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +1 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +2 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +1 -4
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +53 -1
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +58 -59
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +10 -5
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +4 -0
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +18 -8
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +1 -1
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +1 -0
- package/src/duckdb/src/execution/physical_plan_generator.cpp +5 -5
- package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +2 -1
- package/src/duckdb/src/function/function.cpp +4 -0
- package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +6 -0
- package/src/duckdb/src/function/scalar/struct/remap_struct.cpp +10 -1
- package/src/duckdb/src/function/table/copy_csv.cpp +1 -0
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -9
- package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_column_mapper.hpp +3 -5
- package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_reader.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/shadow_forbidden_functions.hpp +40 -0
- package/src/duckdb/src/include/duckdb/common/string.hpp +25 -2
- package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +20 -24
- package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +20 -24
- package/src/duckdb/src/include/duckdb/common/types.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +34 -8
- package/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +3 -2
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +3 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +15 -3
- package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +7 -3
- package/src/duckdb/src/include/duckdb/function/function.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +2 -1
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +20 -12
- package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +4 -3
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +3 -1
- package/src/duckdb/src/include/duckdb/logging/log_type.hpp +17 -0
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +22 -6
- package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -1
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +27 -13
- package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/settings.hpp +11 -0
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +7 -1
- package/src/duckdb/src/include/duckdb/original/std/locale.hpp +10 -0
- package/src/duckdb/src/include/duckdb/original/std/memory.hpp +12 -0
- package/src/duckdb/src/include/duckdb/original/std/sstream.hpp +11 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +5 -3
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +4 -2
- package/src/duckdb/src/logging/log_manager.cpp +1 -0
- package/src/duckdb/src/logging/log_types.cpp +40 -0
- package/src/duckdb/src/main/attached_database.cpp +4 -0
- package/src/duckdb/src/main/client_context.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +1 -0
- package/src/duckdb/src/main/database.cpp +1 -0
- package/src/duckdb/src/main/database_manager.cpp +19 -2
- package/src/duckdb/src/main/extension/extension_helper.cpp +4 -3
- package/src/duckdb/src/main/query_profiler.cpp +2 -2
- package/src/duckdb/src/main/query_result.cpp +1 -1
- package/src/duckdb/src/main/secret/secret_manager.cpp +2 -0
- package/src/duckdb/src/main/settings/autogenerated_settings.cpp +7 -0
- package/src/duckdb/src/main/settings/custom_settings.cpp +106 -34
- package/src/duckdb/src/optimizer/optimizer.cpp +1 -1
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +18 -8
- package/src/duckdb/src/parallel/executor.cpp +5 -0
- package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_interval.cpp +5 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +21 -24
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -8
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +3 -2
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +0 -4
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +3 -0
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +3 -0
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +3 -0
- package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +0 -1
- package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder.cpp +4 -2
- package/src/duckdb/src/planner/logical_operator.cpp +2 -1
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +4 -1
- package/src/duckdb/src/storage/buffer/block_handle.cpp +8 -0
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +44 -18
- package/src/duckdb/src/storage/caching_file_system.cpp +7 -7
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +4 -3
- package/src/duckdb/src/storage/storage_info.cpp +2 -0
- package/src/duckdb/src/storage/wal_replay.cpp +9 -4
- package/src/duckdb/third_party/fmt/include/fmt/format.h +8 -1
- package/src/duckdb/third_party/fsst/libfsst.cpp +4 -3
- package/src/duckdb/third_party/httplib/httplib.hpp +25 -22
- package/src/duckdb/third_party/hyperloglog/sds.cpp +7 -3
- package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +8 -1
- package/src/duckdb/third_party/re2/re2/filtered_re2.h +8 -2
- package/src/duckdb/third_party/re2/re2/pod_array.h +7 -1
- package/src/duckdb/third_party/re2/re2/re2.cc +6 -2
- package/src/duckdb/third_party/re2/re2/set.cc +1 -1
- package/src/duckdb/third_party/re2/re2/set.h +7 -1
- package/src/duckdb/ub_src_logging.cpp +4 -4
@@ -105,7 +105,9 @@ public:
|
|
105
105
|
DUCKDB_API void ScanWithPrefix(CatalogTransaction transaction, const std::function<void(CatalogEntry &)> &callback,
|
106
106
|
const string &prefix);
|
107
107
|
DUCKDB_API void Scan(CatalogTransaction transaction, const std::function<void(CatalogEntry &)> &callback);
|
108
|
+
DUCKDB_API void ScanWithReturn(CatalogTransaction transaction, const std::function<bool(CatalogEntry &)> &callback);
|
108
109
|
DUCKDB_API void Scan(ClientContext &context, const std::function<void(CatalogEntry &)> &callback);
|
110
|
+
DUCKDB_API void ScanWithReturn(ClientContext &context, const std::function<bool(CatalogEntry &)> &callback);
|
109
111
|
|
110
112
|
template <class T>
|
111
113
|
vector<reference<T>> GetEntries(CatalogTransaction transaction) {
|
@@ -16,9 +16,9 @@ class Allocator;
|
|
16
16
|
class BlockManager;
|
17
17
|
struct FileHandle;
|
18
18
|
|
19
|
-
enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2, TINY_BUFFER = 3 };
|
19
|
+
enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2, TINY_BUFFER = 3, EXTERNAL_FILE = 4 };
|
20
20
|
|
21
|
-
static constexpr idx_t FILE_BUFFER_TYPE_COUNT =
|
21
|
+
static constexpr idx_t FILE_BUFFER_TYPE_COUNT = 4;
|
22
22
|
|
23
23
|
//! The FileBuffer represents a buffer that can be read or written to a Direct IO FileHandle.
|
24
24
|
class FileBuffer {
|
@@ -70,7 +70,7 @@ inline
|
|
70
70
|
shared_ptr<DATA_TYPE>
|
71
71
|
make_shared_ptr(ARGS&&... args) // NOLINT: mimic std style
|
72
72
|
{
|
73
|
-
return shared_ptr<DATA_TYPE>(
|
73
|
+
return shared_ptr<DATA_TYPE>(duckdb_base_std::make_shared<DATA_TYPE>(std::forward<ARGS>(args)...));
|
74
74
|
}
|
75
75
|
|
76
76
|
template<class DATA_TYPE, class... ARGS>
|
@@ -82,31 +82,31 @@ make_unsafe_uniq(ARGS&&... args) // NOLINT: mimic std style
|
|
82
82
|
}
|
83
83
|
|
84
84
|
template<class DATA_TYPE>
|
85
|
-
inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, true>
|
85
|
+
inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, true>
|
86
86
|
make_uniq_array(size_t n) // NOLINT: mimic std style
|
87
87
|
{
|
88
|
-
return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, true>(new DATA_TYPE[n]());
|
88
|
+
return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, true>(new DATA_TYPE[n]());
|
89
89
|
}
|
90
90
|
|
91
91
|
template<class DATA_TYPE>
|
92
|
-
inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, true>
|
92
|
+
inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, true>
|
93
93
|
make_uniq_array_uninitialized(size_t n) // NOLINT: mimic std style
|
94
94
|
{
|
95
|
-
return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, true>(new DATA_TYPE[n]);
|
95
|
+
return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, true>(new DATA_TYPE[n]);
|
96
96
|
}
|
97
97
|
|
98
98
|
template<class DATA_TYPE>
|
99
|
-
inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, false>
|
99
|
+
inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, false>
|
100
100
|
make_unsafe_uniq_array(size_t n) // NOLINT: mimic std style
|
101
101
|
{
|
102
|
-
return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, false>(new DATA_TYPE[n]());
|
102
|
+
return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, false>(new DATA_TYPE[n]());
|
103
103
|
}
|
104
104
|
|
105
105
|
template<class DATA_TYPE>
|
106
|
-
inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, false>
|
106
|
+
inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, false>
|
107
107
|
make_unsafe_uniq_array_uninitialized(size_t n) // NOLINT: mimic std style
|
108
108
|
{
|
109
|
-
return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, false>(new DATA_TYPE[n]);
|
109
|
+
return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, false>(new DATA_TYPE[n]);
|
110
110
|
}
|
111
111
|
|
112
112
|
template<class DATA_TYPE, class... ARGS>
|
@@ -15,9 +15,9 @@
|
|
15
15
|
#include "duckdb/planner/expression_iterator.hpp"
|
16
16
|
#include "duckdb/planner/table_filter.hpp"
|
17
17
|
#include "duckdb/common/open_file_info.hpp"
|
18
|
+
#include "duckdb/original/std/sstream.hpp"
|
18
19
|
|
19
20
|
#include <iostream>
|
20
|
-
#include <sstream>
|
21
21
|
|
22
22
|
namespace duckdb {
|
23
23
|
struct MultiFilePushdownInfo;
|
@@ -19,16 +19,15 @@ public:
|
|
19
19
|
MultiFileColumnMapper(ClientContext &context, MultiFileReader &multi_file_reader, MultiFileReaderData &reader_data,
|
20
20
|
const vector<MultiFileColumnDefinition> &global_columns,
|
21
21
|
const vector<ColumnIndex> &global_column_ids, optional_ptr<TableFilterSet> filters,
|
22
|
-
MultiFileList &multi_file_list, const
|
23
|
-
const virtual_column_map_t &virtual_columns);
|
22
|
+
MultiFileList &multi_file_list, const virtual_column_map_t &virtual_columns);
|
24
23
|
|
25
24
|
public:
|
26
|
-
ReaderInitializeType CreateMapping();
|
25
|
+
ReaderInitializeType CreateMapping(MultiFileColumnMappingMode mapping_mode);
|
27
26
|
|
28
27
|
void ThrowColumnNotFoundError(const string &global_column_name) const;
|
29
28
|
|
30
29
|
private:
|
31
|
-
ResultColumnMapping CreateColumnMapping();
|
30
|
+
ResultColumnMapping CreateColumnMapping(MultiFileColumnMappingMode mapping_mode);
|
32
31
|
ResultColumnMapping CreateColumnMappingByMapper(const ColumnMapper &mapper);
|
33
32
|
|
34
33
|
unique_ptr<TableFilterSet> CreateFilters(map<idx_t, reference<TableFilter>> &filters, ResultColumnMapping &mapping);
|
@@ -45,7 +44,6 @@ private:
|
|
45
44
|
const vector<MultiFileColumnDefinition> &global_columns;
|
46
45
|
const vector<ColumnIndex> &global_column_ids;
|
47
46
|
optional_ptr<TableFilterSet> global_filters;
|
48
|
-
const MultiFileReaderBindData &bind_data;
|
49
47
|
const virtual_column_map_t &virtual_columns;
|
50
48
|
};
|
51
49
|
|
@@ -106,6 +106,13 @@ public:
|
|
106
106
|
optional_ptr<MultiFileReaderGlobalState> global_state);
|
107
107
|
|
108
108
|
//! Create all required mappings from the global types/names to the file-local types/names
|
109
|
+
DUCKDB_API virtual ReaderInitializeType
|
110
|
+
CreateMapping(ClientContext &context, MultiFileReaderData &reader_data,
|
111
|
+
const vector<MultiFileColumnDefinition> &global_columns, const vector<ColumnIndex> &global_column_ids,
|
112
|
+
optional_ptr<TableFilterSet> filters, MultiFileList &multi_file_list,
|
113
|
+
const MultiFileReaderBindData &bind_data, const virtual_column_map_t &virtual_columns,
|
114
|
+
MultiFileColumnMappingMode mapping_mode);
|
115
|
+
|
109
116
|
DUCKDB_API virtual ReaderInitializeType
|
110
117
|
CreateMapping(ClientContext &context, MultiFileReaderData &reader_data,
|
111
118
|
const vector<MultiFileColumnDefinition> &global_columns, const vector<ColumnIndex> &global_column_ids,
|
@@ -84,6 +84,9 @@ struct MultiFileBindData : public TableFunctionData {
|
|
84
84
|
void Initialize(ClientContext &, BaseUnionData &union_data) {
|
85
85
|
Initialize(std::move(union_data.reader));
|
86
86
|
}
|
87
|
+
bool SupportStatementCache() const override {
|
88
|
+
return false;
|
89
|
+
}
|
87
90
|
|
88
91
|
unique_ptr<FunctionData> Copy() const override;
|
89
92
|
};
|
@@ -0,0 +1,40 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "duckdb/original/std/memory.hpp"
|
4
|
+
#include "duckdb/original/std/locale.hpp"
|
5
|
+
#include "duckdb/original/std/sstream.hpp"
|
6
|
+
#include "duckdb/common/unique_ptr.hpp"
|
7
|
+
#include "duckdb/common/shared_ptr.hpp"
|
8
|
+
|
9
|
+
#ifndef DUCKDB_CLANG_TIDY
|
10
|
+
namespace std {
|
11
|
+
template <class C>
|
12
|
+
bool isspace(C c) {
|
13
|
+
static_assert(sizeof(C) == 0, "Use StringUtil::CharacterIsSpace instead of isspace!");
|
14
|
+
return false;
|
15
|
+
}
|
16
|
+
#ifndef DUCKDB_ENABLE_DEPRECATED_API
|
17
|
+
template <class T, class... ARGS>
|
18
|
+
static std::unique_ptr<T> make_unique(ARGS &&...__args) { // NOLINT: mimic std style
|
19
|
+
static_assert(sizeof(T) == 0, "Use make_uniq instead of make_unique!");
|
20
|
+
return nullptr;
|
21
|
+
}
|
22
|
+
|
23
|
+
template <class T, class... ARGS>
|
24
|
+
static std::shared_ptr<T> make_shared(ARGS &&...__args) { // NOLINT: mimic std style
|
25
|
+
static_assert(sizeof(T) == 0, "Use make_shared_ptr instead of make_shared!");
|
26
|
+
return nullptr;
|
27
|
+
}
|
28
|
+
#endif // DUCKDB_ENABLE_DEPRECATED_API
|
29
|
+
|
30
|
+
template <class charT, class traits = char_traits<charT>, class Allocator = allocator<charT>>
|
31
|
+
class basic_stringstream_mock;
|
32
|
+
|
33
|
+
typedef basic_stringstream_mock<char> stringstream;
|
34
|
+
|
35
|
+
} // namespace std
|
36
|
+
|
37
|
+
using std::isspace;
|
38
|
+
using std::make_shared;
|
39
|
+
using std::make_unique;
|
40
|
+
#endif
|
@@ -8,10 +8,33 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
-
#include
|
11
|
+
#include "duckdb/original/std/sstream.hpp"
|
12
12
|
#include <string>
|
13
|
+
#include <locale>
|
13
14
|
|
14
15
|
namespace duckdb {
|
15
16
|
using std::string;
|
16
|
-
|
17
|
+
} // namespace duckdb
|
18
|
+
|
19
|
+
namespace duckdb {
|
20
|
+
|
21
|
+
template <class charT, class traits = std::char_traits<charT>, class Allocator = std::allocator<charT>>
|
22
|
+
class basic_stringstream : public duckdb_base_std::basic_stringstream<charT, traits, Allocator> {
|
23
|
+
public:
|
24
|
+
using original = duckdb_base_std::basic_stringstream<charT, traits, Allocator>;
|
25
|
+
|
26
|
+
explicit basic_stringstream(std::ios_base::openmode which = std::ios_base::out | std::ios_base::in)
|
27
|
+
: original(which) {
|
28
|
+
this->imbue(std::locale::classic());
|
29
|
+
}
|
30
|
+
explicit basic_stringstream(const std::basic_string<charT, traits, Allocator> &s,
|
31
|
+
std::ios_base::openmode which = std::ios_base::out | std::ios_base::in)
|
32
|
+
: original(s, which) {
|
33
|
+
this->imbue(std::locale::classic());
|
34
|
+
}
|
35
|
+
basic_stringstream(const basic_stringstream &) = delete;
|
36
|
+
basic_stringstream(basic_stringstream &&rhs) noexcept;
|
37
|
+
};
|
38
|
+
|
39
|
+
typedef basic_stringstream<char> stringstream;
|
17
40
|
} // namespace duckdb
|
@@ -129,46 +129,42 @@ public:
|
|
129
129
|
static int Sign(hugeint_t n);
|
130
130
|
static hugeint_t Abs(hugeint_t n);
|
131
131
|
// comparison operators
|
132
|
-
// note that everywhere here we intentionally use bitwise ops
|
133
|
-
// this is because they seem to be consistently much faster (benchmarked on a Macbook Pro)
|
134
132
|
static bool Equals(hugeint_t lhs, hugeint_t rhs) {
|
135
|
-
|
136
|
-
|
137
|
-
return lower_equals
|
133
|
+
bool lower_equals = lhs.lower == rhs.lower;
|
134
|
+
bool upper_equals = lhs.upper == rhs.upper;
|
135
|
+
return lower_equals && upper_equals;
|
138
136
|
}
|
139
137
|
|
140
138
|
static bool NotEquals(hugeint_t lhs, hugeint_t rhs) {
|
141
|
-
|
142
|
-
int upper_not_equals = lhs.upper != rhs.upper;
|
143
|
-
return lower_not_equals | upper_not_equals;
|
139
|
+
return !Equals(lhs, rhs);
|
144
140
|
}
|
145
141
|
|
146
142
|
static bool GreaterThan(hugeint_t lhs, hugeint_t rhs) {
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
return upper_bigger
|
143
|
+
bool upper_bigger = lhs.upper > rhs.upper;
|
144
|
+
bool upper_equal = lhs.upper == rhs.upper;
|
145
|
+
bool lower_bigger = lhs.lower > rhs.lower;
|
146
|
+
return upper_bigger || (upper_equal && lower_bigger);
|
151
147
|
}
|
152
148
|
|
153
149
|
static bool GreaterThanEquals(hugeint_t lhs, hugeint_t rhs) {
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
return upper_bigger
|
150
|
+
bool upper_bigger = lhs.upper > rhs.upper;
|
151
|
+
bool upper_equal = lhs.upper == rhs.upper;
|
152
|
+
bool lower_bigger_equals = lhs.lower >= rhs.lower;
|
153
|
+
return upper_bigger || (upper_equal && lower_bigger_equals);
|
158
154
|
}
|
159
155
|
|
160
156
|
static bool LessThan(hugeint_t lhs, hugeint_t rhs) {
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
return upper_smaller
|
157
|
+
bool upper_smaller = lhs.upper < rhs.upper;
|
158
|
+
bool upper_equal = lhs.upper == rhs.upper;
|
159
|
+
bool lower_smaller = lhs.lower < rhs.lower;
|
160
|
+
return upper_smaller || (upper_equal && lower_smaller);
|
165
161
|
}
|
166
162
|
|
167
163
|
static bool LessThanEquals(hugeint_t lhs, hugeint_t rhs) {
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
return upper_smaller
|
164
|
+
bool upper_smaller = lhs.upper < rhs.upper;
|
165
|
+
bool upper_equal = lhs.upper == rhs.upper;
|
166
|
+
bool lower_smaller_equals = lhs.lower <= rhs.lower;
|
167
|
+
return upper_smaller || (upper_equal && lower_smaller_equals);
|
172
168
|
}
|
173
169
|
|
174
170
|
static constexpr uint8_t CACHED_POWERS_OF_TEN = 39;
|
@@ -118,46 +118,42 @@ public:
|
|
118
118
|
static hugeint_t Abs(hugeint_t n);
|
119
119
|
|
120
120
|
// comparison operators
|
121
|
-
// note that everywhere here we intentionally use bitwise ops
|
122
|
-
// this is because they seem to be consistently much faster (benchmarked on a Macbook Pro)
|
123
121
|
static bool Equals(uhugeint_t lhs, uhugeint_t rhs) {
|
124
|
-
|
125
|
-
|
126
|
-
return lower_equals
|
122
|
+
bool lower_equals = lhs.lower == rhs.lower;
|
123
|
+
bool upper_equals = lhs.upper == rhs.upper;
|
124
|
+
return lower_equals && upper_equals;
|
127
125
|
}
|
128
126
|
|
129
127
|
static bool NotEquals(uhugeint_t lhs, uhugeint_t rhs) {
|
130
|
-
|
131
|
-
int upper_not_equals = lhs.upper != rhs.upper;
|
132
|
-
return lower_not_equals | upper_not_equals;
|
128
|
+
return !Equals(lhs, rhs);
|
133
129
|
}
|
134
130
|
|
135
131
|
static bool GreaterThan(uhugeint_t lhs, uhugeint_t rhs) {
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
return upper_bigger
|
132
|
+
bool upper_bigger = lhs.upper > rhs.upper;
|
133
|
+
bool upper_equal = lhs.upper == rhs.upper;
|
134
|
+
bool lower_bigger = lhs.lower > rhs.lower;
|
135
|
+
return upper_bigger || (upper_equal && lower_bigger);
|
140
136
|
}
|
141
137
|
|
142
138
|
static bool GreaterThanEquals(uhugeint_t lhs, uhugeint_t rhs) {
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
return upper_bigger
|
139
|
+
bool upper_bigger = lhs.upper > rhs.upper;
|
140
|
+
bool upper_equal = lhs.upper == rhs.upper;
|
141
|
+
bool lower_bigger_equals = lhs.lower >= rhs.lower;
|
142
|
+
return upper_bigger || (upper_equal && lower_bigger_equals);
|
147
143
|
}
|
148
144
|
|
149
145
|
static bool LessThan(uhugeint_t lhs, uhugeint_t rhs) {
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
return upper_smaller
|
146
|
+
bool upper_smaller = lhs.upper < rhs.upper;
|
147
|
+
bool upper_equal = lhs.upper == rhs.upper;
|
148
|
+
bool lower_smaller = lhs.lower < rhs.lower;
|
149
|
+
return upper_smaller || (upper_equal && lower_smaller);
|
154
150
|
}
|
155
151
|
|
156
152
|
static bool LessThanEquals(uhugeint_t lhs, uhugeint_t rhs) {
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
return upper_smaller
|
153
|
+
bool upper_smaller = lhs.upper < rhs.upper;
|
154
|
+
bool upper_equal = lhs.upper == rhs.upper;
|
155
|
+
bool lower_smaller_equals = lhs.lower <= rhs.lower;
|
156
|
+
return upper_smaller || (upper_equal && lower_smaller_equals);
|
161
157
|
}
|
162
158
|
|
163
159
|
static constexpr uint8_t CACHED_POWERS_OF_TEN = 39;
|
@@ -3,16 +3,16 @@
|
|
3
3
|
#include "duckdb/common/exception.hpp"
|
4
4
|
#include "duckdb/common/likely.hpp"
|
5
5
|
#include "duckdb/common/memory_safety.hpp"
|
6
|
+
#include "duckdb/original/std/memory.hpp"
|
6
7
|
|
7
|
-
#include <memory>
|
8
8
|
#include <type_traits>
|
9
9
|
|
10
10
|
namespace duckdb {
|
11
11
|
|
12
12
|
template <class DATA_TYPE, class DELETER = std::default_delete<DATA_TYPE>, bool SAFE = true>
|
13
|
-
class unique_ptr : public
|
13
|
+
class unique_ptr : public duckdb_base_std::unique_ptr<DATA_TYPE, DELETER> { // NOLINT: naming
|
14
14
|
public:
|
15
|
-
using original =
|
15
|
+
using original = duckdb_base_std::unique_ptr<DATA_TYPE, DELETER>;
|
16
16
|
using original::original; // NOLINT
|
17
17
|
using pointer = typename original::pointer;
|
18
18
|
|
@@ -54,11 +54,37 @@ public:
|
|
54
54
|
}
|
55
55
|
};
|
56
56
|
|
57
|
-
|
57
|
+
template <class DATA_TYPE, class DELETER>
|
58
|
+
class unique_ptr<DATA_TYPE[], DELETER, true> : public duckdb_base_std::unique_ptr<DATA_TYPE[], DELETER> {
|
59
|
+
public:
|
60
|
+
using original = duckdb_base_std::unique_ptr<DATA_TYPE[], DELETER>;
|
61
|
+
using original::original;
|
62
|
+
|
63
|
+
private:
|
64
|
+
static inline void AssertNotNull(const bool null) {
|
65
|
+
#if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
|
66
|
+
return;
|
67
|
+
#else
|
68
|
+
if (DUCKDB_UNLIKELY(null)) {
|
69
|
+
throw duckdb::InternalException("Attempted to dereference unique_ptr that is NULL!");
|
70
|
+
}
|
71
|
+
#endif
|
72
|
+
}
|
73
|
+
|
74
|
+
public:
|
75
|
+
typename std::add_lvalue_reference<DATA_TYPE>::type operator[](size_t __i) const { // NOLINT: hiding on purpose
|
76
|
+
const auto ptr = original::get();
|
77
|
+
if (MemorySafety<true>::ENABLED) {
|
78
|
+
AssertNotNull(!ptr);
|
79
|
+
}
|
80
|
+
return ptr[__i];
|
81
|
+
}
|
82
|
+
};
|
83
|
+
|
58
84
|
template <class DATA_TYPE, class DELETER, bool SAFE>
|
59
|
-
class unique_ptr<DATA_TYPE[], DELETER, SAFE> : public
|
85
|
+
class unique_ptr<DATA_TYPE[], DELETER, SAFE> : public duckdb_base_std::unique_ptr<DATA_TYPE[], DELETER> {
|
60
86
|
public:
|
61
|
-
using original =
|
87
|
+
using original = duckdb_base_std::unique_ptr<DATA_TYPE[], DELETER>;
|
62
88
|
using original::original;
|
63
89
|
|
64
90
|
private:
|
@@ -83,10 +109,10 @@ public:
|
|
83
109
|
};
|
84
110
|
|
85
111
|
template <typename T>
|
86
|
-
using unique_array = unique_ptr<T[], std::default_delete<T>, true>;
|
112
|
+
using unique_array = unique_ptr<T[], std::default_delete<T[]>, true>;
|
87
113
|
|
88
114
|
template <typename T>
|
89
|
-
using unsafe_unique_array = unique_ptr<T[], std::default_delete<T>, false>;
|
115
|
+
using unsafe_unique_array = unique_ptr<T[], std::default_delete<T[]>, false>;
|
90
116
|
|
91
117
|
template <typename T>
|
92
118
|
using unsafe_unique_ptr = unique_ptr<T, std::default_delete<T>, false>;
|
@@ -168,8 +168,8 @@ public:
|
|
168
168
|
TupleDataChunkState chunk_state;
|
169
169
|
};
|
170
170
|
|
171
|
-
JoinHashTable(ClientContext &context, const
|
172
|
-
JoinType type, const vector<idx_t> &output_columns);
|
171
|
+
JoinHashTable(ClientContext &context, const PhysicalOperator &op, const vector<JoinCondition> &conditions,
|
172
|
+
vector<LogicalType> build_types, JoinType type, const vector<idx_t> &output_columns);
|
173
173
|
~JoinHashTable();
|
174
174
|
|
175
175
|
//! Add the given data to the HT
|
@@ -214,6 +214,7 @@ public:
|
|
214
214
|
}
|
215
215
|
|
216
216
|
ClientContext &context;
|
217
|
+
const PhysicalOperator &op;
|
217
218
|
//! BufferManager
|
218
219
|
BufferManager &buffer_manager;
|
219
220
|
//! The join conditions
|
package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp
CHANGED
@@ -27,6 +27,8 @@ struct ColumnCount {
|
|
27
27
|
bool is_comment = false;
|
28
28
|
//! If this row is potentially a mid-line comment
|
29
29
|
bool is_mid_comment = false;
|
30
|
+
//! Number of empty lines before this value
|
31
|
+
idx_t empty_lines = 0;
|
30
32
|
};
|
31
33
|
|
32
34
|
class ColumnCountResult : public ScannerResult {
|
@@ -48,6 +50,7 @@ public:
|
|
48
50
|
map<idx_t, idx_t> rows_per_column_count;
|
49
51
|
CSVErrorHandler &error_handler;
|
50
52
|
map<idx_t, shared_ptr<CSVBufferHandle>> buffer_handles;
|
53
|
+
idx_t empty_lines = 0;
|
51
54
|
//! Adds a Value to the result
|
52
55
|
static inline void AddValue(ColumnCountResult &result, idx_t buffer_pos);
|
53
56
|
//! Adds a Row to the result
|
@@ -40,7 +40,7 @@ struct CSVEncoderBuffer {
|
|
40
40
|
|
41
41
|
private:
|
42
42
|
//! The encoded buffer, we only have one per file, so we cache it and make sure to pass over unused bytes.
|
43
|
-
|
43
|
+
duckdb::unique_ptr<char[]> encoded_buffer;
|
44
44
|
//! The encoded buffer size is defined as buffer_size/GetRatio()
|
45
45
|
idx_t encoded_buffer_size;
|
46
46
|
};
|
package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp
CHANGED
@@ -30,6 +30,18 @@ struct QuoteEscapeCombination {
|
|
30
30
|
char escape;
|
31
31
|
};
|
32
32
|
|
33
|
+
//! Current stats of candidate analysis
|
34
|
+
struct CandidateStats {
|
35
|
+
//! Number of rows read
|
36
|
+
idx_t rows_read = 0;
|
37
|
+
//! Best Number of consistent rows (i.e., presenting all columns)
|
38
|
+
idx_t best_consistent_rows = 0;
|
39
|
+
//! If padding was necessary (i.e., rows are missing some columns, how many)
|
40
|
+
idx_t prev_padding_count = 0;
|
41
|
+
//! Min number of ignored rows
|
42
|
+
idx_t min_ignored_rows = 0;
|
43
|
+
};
|
44
|
+
|
33
45
|
//! All the options that will be used to sniff the dialect of the CSV file
|
34
46
|
struct DialectCandidates {
|
35
47
|
//! The constructor populates all of our the options that will be used in our sniffer search space
|
@@ -131,9 +143,9 @@ private:
|
|
131
143
|
void GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountScanner>> &column_count_scanners,
|
132
144
|
const DialectCandidates &dialect_candidates);
|
133
145
|
|
134
|
-
//! 2. Analyzes if dialect candidate is a good candidate to be considered, if so, it adds it to the candidates
|
135
|
-
void AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner>,
|
136
|
-
|
146
|
+
//! 2. Analyzes if a dialect candidate is a good candidate to be considered, if so, it adds it to the candidates
|
147
|
+
void AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner>, CandidateStats &stats,
|
148
|
+
vector<unique_ptr<ColumnCountScanner>> &successful_candidates);
|
137
149
|
//! 3. Refine Candidates over remaining chunks
|
138
150
|
void RefineCandidates();
|
139
151
|
|
@@ -197,9 +197,9 @@ struct VectorCastHelpers {
|
|
197
197
|
|
198
198
|
if (STRUCT_KEY) {
|
199
199
|
needs_quotes = true;
|
200
|
-
} else if (
|
200
|
+
} else if (StringUtil::CharacterIsSpace(string_data[0])) {
|
201
201
|
needs_quotes = true;
|
202
|
-
} else if (base_length >= 2 &&
|
202
|
+
} else if (base_length >= 2 && StringUtil::CharacterIsSpace(string_data[base_length - 1])) {
|
203
203
|
needs_quotes = true;
|
204
204
|
} else if (StringUtil::CIEquals(string_data, base_length, "null", 4)) {
|
205
205
|
needs_quotes = true;
|
@@ -20,6 +20,7 @@ struct CopyFunctionFileStatistics;
|
|
20
20
|
class Binder;
|
21
21
|
class ColumnDataCollection;
|
22
22
|
class ExecutionContext;
|
23
|
+
class PhysicalOperatorLogger;
|
23
24
|
|
24
25
|
struct LocalFunctionData {
|
25
26
|
virtual ~LocalFunctionData() = default;
|
@@ -123,6 +124,8 @@ typedef void (*copy_to_get_written_statistics_t)(ClientContext &context, Functio
|
|
123
124
|
|
124
125
|
typedef vector<unique_ptr<Expression>> (*copy_to_select_t)(CopyToSelectInput &input);
|
125
126
|
|
127
|
+
typedef void (*copy_to_initialize_operator_t)(GlobalFunctionData &gstate, const PhysicalOperator &op);
|
128
|
+
|
126
129
|
enum class CopyFunctionReturnType : uint8_t {
|
127
130
|
CHANGED_ROWS = 0,
|
128
131
|
CHANGED_ROWS_AND_FILE_LIST = 1,
|
@@ -145,9 +148,9 @@ public:
|
|
145
148
|
: Function(name), plan(nullptr), copy_to_select(nullptr), copy_to_bind(nullptr),
|
146
149
|
copy_to_initialize_local(nullptr), copy_to_initialize_global(nullptr),
|
147
150
|
copy_to_get_written_statistics(nullptr), copy_to_sink(nullptr), copy_to_combine(nullptr),
|
148
|
-
copy_to_finalize(nullptr), execution_mode(nullptr),
|
149
|
-
|
150
|
-
deserialize(nullptr), copy_from_bind(nullptr) {
|
151
|
+
copy_to_finalize(nullptr), execution_mode(nullptr), initialize_operator(nullptr), prepare_batch(nullptr),
|
152
|
+
flush_batch(nullptr), desired_batch_size(nullptr), rotate_files(nullptr), rotate_next_file(nullptr),
|
153
|
+
serialize(nullptr), deserialize(nullptr), copy_from_bind(nullptr) {
|
151
154
|
}
|
152
155
|
|
153
156
|
//! Plan rewrite copy function
|
@@ -162,6 +165,7 @@ public:
|
|
162
165
|
copy_to_combine_t copy_to_combine;
|
163
166
|
copy_to_finalize_t copy_to_finalize;
|
164
167
|
copy_to_execution_mode_t execution_mode;
|
168
|
+
copy_to_initialize_operator_t initialize_operator;
|
165
169
|
|
166
170
|
copy_prepare_batch_t prepare_batch;
|
167
171
|
copy_flush_batch_t flush_batch;
|
@@ -61,6 +61,7 @@ struct FunctionData {
|
|
61
61
|
DUCKDB_API virtual unique_ptr<FunctionData> Copy() const = 0;
|
62
62
|
DUCKDB_API virtual bool Equals(const FunctionData &other) const = 0;
|
63
63
|
DUCKDB_API static bool Equals(const FunctionData *left, const FunctionData *right);
|
64
|
+
DUCKDB_API virtual bool SupportStatementCache() const;
|
64
65
|
|
65
66
|
template <class TARGET>
|
66
67
|
TARGET &Cast() {
|
@@ -73,9 +73,10 @@ public:
|
|
73
73
|
const vector<unique_ptr<Expression>> &groups);
|
74
74
|
DUCKDB_API static void BindSortedAggregate(ClientContext &context, BoundWindowExpression &expr);
|
75
75
|
|
76
|
-
private:
|
77
76
|
//! Cast a set of expressions to the arguments of this function
|
78
77
|
void CastToFunctionArguments(SimpleFunction &function, vector<unique_ptr<Expression>> &children);
|
78
|
+
|
79
|
+
private:
|
79
80
|
optional_idx BindVarArgsFunctionCost(const SimpleFunction &func, const vector<LogicalType> &arguments);
|
80
81
|
optional_idx BindFunctionCost(const SimpleFunction &func, const vector<LogicalType> &arguments);
|
81
82
|
|