duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +2 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
- package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
- package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
- package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
- package/src/duckdb/extension/json/json_scan.cpp +1 -1
- package/src/duckdb/extension/json/json_serializer.cpp +26 -69
- package/src/duckdb/src/common/enum_util.cpp +119 -7
- package/src/duckdb/src/common/extra_type_info.cpp +7 -3
- package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
- package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
- package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
- package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
- package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
- package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
- package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
- package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
- package/src/duckdb/src/common/types/interval.cpp +3 -0
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
- package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
- package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
- package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
- package/src/duckdb/src/common/types/value.cpp +63 -42
- package/src/duckdb/src/common/types/vector.cpp +33 -67
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
- package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
- package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
- package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
- package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
- package/src/duckdb/src/execution/window_executor.cpp +6 -5
- package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
- package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
- package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
- package/src/duckdb/src/function/table/read_csv.cpp +150 -136
- package/src/duckdb/src/function/table/table_scan.cpp +0 -2
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
- package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
- package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
- package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
- package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
- package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
- package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
- package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
- package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
- package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
- package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb.h +12 -0
- package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
- package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
- package/src/duckdb/src/main/client_verify.cpp +1 -0
- package/src/duckdb/src/main/config.cpp +2 -2
- package/src/duckdb/src/main/connection.cpp +3 -3
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
- package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
- package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
- package/src/duckdb/src/planner/logical_operator.cpp +1 -1
- package/src/duckdb/src/planner/planner.cpp +1 -1
- package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
- package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
- package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
- package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
- package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
- package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
- package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
- package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
- package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
- package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
- package/src/duckdb/src/storage/storage_info.cpp +1 -1
- package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
- package/src/duckdb/src/storage/table/row_group.cpp +68 -1
- package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
- package/src/duckdb/src/storage/wal_replay.cpp +2 -2
- package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
- package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
- package/src/duckdb/ub_src_execution.cpp +0 -2
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
- package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -8,19 +8,19 @@
|
|
8
8
|
|
9
9
|
#pragma once
|
10
10
|
|
11
|
-
#include "duckdb/execution/operator/
|
11
|
+
#include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
|
12
12
|
#include "duckdb/main/relation/table_function_relation.hpp"
|
13
13
|
|
14
14
|
namespace duckdb {
|
15
15
|
|
16
|
-
struct
|
16
|
+
struct CSVReaderOptions;
|
17
17
|
|
18
18
|
class ReadCSVRelation : public TableFunctionRelation {
|
19
19
|
public:
|
20
|
-
ReadCSVRelation(const
|
21
|
-
|
22
|
-
ReadCSVRelation(const
|
23
|
-
|
20
|
+
ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, vector<ColumnDefinition> columns,
|
21
|
+
string alias = string());
|
22
|
+
ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, CSVReaderOptions options,
|
23
|
+
string alias = string());
|
24
24
|
|
25
25
|
string alias;
|
26
26
|
bool auto_detect;
|
@@ -18,7 +18,7 @@ class Task;
|
|
18
18
|
|
19
19
|
class Event : public std::enable_shared_from_this<Event> {
|
20
20
|
public:
|
21
|
-
Event(Executor &executor);
|
21
|
+
explicit Event(Executor &executor);
|
22
22
|
virtual ~Event() = default;
|
23
23
|
|
24
24
|
public:
|
@@ -52,6 +52,17 @@ public:
|
|
52
52
|
virtual void PrintPipeline() {
|
53
53
|
}
|
54
54
|
|
55
|
+
template <class TARGET>
|
56
|
+
TARGET &Cast() {
|
57
|
+
D_ASSERT(dynamic_cast<TARGET *>(this));
|
58
|
+
return reinterpret_cast<TARGET &>(*this);
|
59
|
+
}
|
60
|
+
template <class TARGET>
|
61
|
+
const TARGET &Cast() const {
|
62
|
+
D_ASSERT(dynamic_cast<const TARGET *>(this));
|
63
|
+
return reinterpret_cast<const TARGET &>(*this);
|
64
|
+
}
|
65
|
+
|
55
66
|
protected:
|
56
67
|
Executor &executor;
|
57
68
|
//! The current threads working on the event
|
@@ -14,6 +14,9 @@
|
|
14
14
|
|
15
15
|
namespace duckdb {
|
16
16
|
|
17
|
+
class FormatSerializer;
|
18
|
+
class FormatDeserializer;
|
19
|
+
|
17
20
|
class Block : public FileBuffer {
|
18
21
|
public:
|
19
22
|
Block(Allocator &allocator, block_id_t id);
|
@@ -51,6 +54,9 @@ struct MetaBlockPointer {
|
|
51
54
|
}
|
52
55
|
block_id_t GetBlockId();
|
53
56
|
uint32_t GetBlockIndex();
|
57
|
+
|
58
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
59
|
+
static MetaBlockPointer FormatDeserialize(FormatDeserializer &source);
|
54
60
|
};
|
55
61
|
|
56
62
|
} // namespace duckdb
|
@@ -97,6 +97,9 @@ public:
|
|
97
97
|
inline const idx_t &GetMemoryUsage() const {
|
98
98
|
return memory_usage;
|
99
99
|
}
|
100
|
+
bool IsUnloaded() {
|
101
|
+
return state == BlockState::BLOCK_UNLOADED;
|
102
|
+
}
|
100
103
|
|
101
104
|
private:
|
102
105
|
static BufferHandle Load(shared_ptr<BlockHandle> &handle, unique_ptr<FileBuffer> buffer = nullptr);
|
@@ -19,6 +19,9 @@
|
|
19
19
|
namespace duckdb {
|
20
20
|
struct SelectionVector;
|
21
21
|
|
22
|
+
class FormatSerializer;
|
23
|
+
class FormatDeserializer;
|
24
|
+
|
22
25
|
class Serializer;
|
23
26
|
class Deserializer;
|
24
27
|
class FieldWriter;
|
@@ -94,17 +97,18 @@ public:
|
|
94
97
|
|
95
98
|
void Serialize(Serializer &serializer) const;
|
96
99
|
void Serialize(FieldWriter &writer) const;
|
97
|
-
|
98
|
-
idx_t GetDistinctCount();
|
99
|
-
|
100
100
|
static BaseStatistics Deserialize(Deserializer &source, LogicalType type);
|
101
101
|
|
102
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
103
|
+
static BaseStatistics FormatDeserialize(FormatDeserializer &deserializer);
|
104
|
+
|
102
105
|
//! Verify that a vector does not violate the statistics
|
103
106
|
void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const;
|
104
107
|
void Verify(Vector &vector, idx_t count) const;
|
105
108
|
|
106
109
|
string ToString() const;
|
107
110
|
|
111
|
+
idx_t GetDistinctCount();
|
108
112
|
static BaseStatistics FromConstant(const Value &input);
|
109
113
|
|
110
114
|
private:
|
@@ -12,6 +12,7 @@
|
|
12
12
|
#include "duckdb/storage/statistics/distinct_statistics.hpp"
|
13
13
|
|
14
14
|
namespace duckdb {
|
15
|
+
class FormatSerializer;
|
15
16
|
|
16
17
|
class ColumnStatistics {
|
17
18
|
public:
|
@@ -35,6 +36,9 @@ public:
|
|
35
36
|
void Serialize(Serializer &serializer) const;
|
36
37
|
static shared_ptr<ColumnStatistics> Deserialize(Deserializer &source, const LogicalType &type);
|
37
38
|
|
39
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
40
|
+
static shared_ptr<ColumnStatistics> FormatDeserialize(FormatDeserializer &source);
|
41
|
+
|
38
42
|
private:
|
39
43
|
BaseStatistics stats;
|
40
44
|
//! The approximate count distinct stats of the column
|
@@ -16,6 +16,8 @@ namespace duckdb {
|
|
16
16
|
class Serializer;
|
17
17
|
class Deserializer;
|
18
18
|
class Vector;
|
19
|
+
class FormatSerializer;
|
20
|
+
class FormatDeserializer;
|
19
21
|
|
20
22
|
class DistinctStatistics {
|
21
23
|
public:
|
@@ -48,6 +50,9 @@ public:
|
|
48
50
|
|
49
51
|
static bool TypeIsSupported(const LogicalType &type);
|
50
52
|
|
53
|
+
void FormatSerialize(FormatSerializer &serializer) const;
|
54
|
+
static unique_ptr<DistinctStatistics> FormatDeserialize(FormatDeserializer &deserializer);
|
55
|
+
|
51
56
|
private:
|
52
57
|
//! For distinct statistics we sample the input to speed up insertions
|
53
58
|
static constexpr const double SAMPLE_RATE = 0.1;
|
@@ -31,6 +31,9 @@ struct ListStats {
|
|
31
31
|
DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
|
32
32
|
DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
|
33
33
|
|
34
|
+
DUCKDB_API static void FormatSerialize(const BaseStatistics &stats, FormatSerializer &serializer);
|
35
|
+
DUCKDB_API static BaseStatistics FormatDeserialize(FormatDeserializer &deserializer, LogicalType type);
|
36
|
+
|
34
37
|
DUCKDB_API static string ToString(const BaseStatistics &stats);
|
35
38
|
|
36
39
|
DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other);
|
@@ -64,6 +64,9 @@ struct NumericStats {
|
|
64
64
|
DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
|
65
65
|
DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
|
66
66
|
|
67
|
+
DUCKDB_API static void FormatSerialize(const BaseStatistics &stats, FormatSerializer &serializer);
|
68
|
+
DUCKDB_API static BaseStatistics FormatDeserialize(FormatDeserializer &deserializer, LogicalType type);
|
69
|
+
|
67
70
|
DUCKDB_API static string ToString(const BaseStatistics &stats);
|
68
71
|
|
69
72
|
template <class T>
|
@@ -61,6 +61,9 @@ struct StringStats {
|
|
61
61
|
DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
|
62
62
|
DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
|
63
63
|
|
64
|
+
DUCKDB_API static void FormatSerialize(const BaseStatistics &stats, FormatSerializer &serializer);
|
65
|
+
DUCKDB_API static BaseStatistics FormatDeserialize(FormatDeserializer &deserializer, LogicalType type);
|
66
|
+
|
64
67
|
DUCKDB_API static string ToString(const BaseStatistics &stats);
|
65
68
|
|
66
69
|
DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
|
@@ -32,6 +32,9 @@ struct StructStats {
|
|
32
32
|
DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
|
33
33
|
DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
|
34
34
|
|
35
|
+
DUCKDB_API static void FormatSerialize(const BaseStatistics &stats, FormatSerializer &serializer);
|
36
|
+
DUCKDB_API static BaseStatistics FormatDeserialize(FormatDeserializer &deserializer, LogicalType type);
|
37
|
+
|
35
38
|
DUCKDB_API static string ToString(const BaseStatistics &stats);
|
36
39
|
|
37
40
|
DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other);
|
@@ -18,6 +18,9 @@ struct SelectionVector;
|
|
18
18
|
class Transaction;
|
19
19
|
struct TransactionData;
|
20
20
|
|
21
|
+
class FormatSerializer;
|
22
|
+
class FormatDeserializer;
|
23
|
+
|
21
24
|
enum class ChunkInfoType : uint8_t { CONSTANT_INFO, VECTOR_INFO, EMPTY_INFO };
|
22
25
|
|
23
26
|
class ChunkInfo {
|
@@ -46,6 +49,9 @@ public:
|
|
46
49
|
virtual void Serialize(Serializer &serialize) = 0;
|
47
50
|
static unique_ptr<ChunkInfo> Deserialize(Deserializer &source);
|
48
51
|
|
52
|
+
virtual void FormatSerialize(FormatSerializer &serializer) const = 0;
|
53
|
+
static unique_ptr<ChunkInfo> FormatDeserialize(FormatDeserializer &deserializer);
|
54
|
+
|
49
55
|
public:
|
50
56
|
template <class TARGET>
|
51
57
|
TARGET &Cast() {
|
@@ -85,10 +91,13 @@ public:
|
|
85
91
|
void Serialize(Serializer &serialize) override;
|
86
92
|
static unique_ptr<ChunkInfo> Deserialize(Deserializer &source);
|
87
93
|
|
94
|
+
void FormatSerialize(FormatSerializer &serializer) const override;
|
95
|
+
static unique_ptr<ChunkInfo> FormatDeserialize(FormatDeserializer &deserializer);
|
96
|
+
|
88
97
|
private:
|
89
98
|
template <class OP>
|
90
99
|
idx_t TemplatedGetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector,
|
91
|
-
idx_t max_count);
|
100
|
+
idx_t max_count) const;
|
92
101
|
};
|
93
102
|
|
94
103
|
class ChunkVectorInfo : public ChunkInfo {
|
@@ -109,7 +118,7 @@ public:
|
|
109
118
|
|
110
119
|
public:
|
111
120
|
idx_t GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector,
|
112
|
-
idx_t max_count);
|
121
|
+
idx_t max_count) const;
|
113
122
|
idx_t GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) override;
|
114
123
|
idx_t GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id,
|
115
124
|
SelectionVector &sel_vector, idx_t max_count) override;
|
@@ -130,10 +139,13 @@ public:
|
|
130
139
|
void Serialize(Serializer &serialize) override;
|
131
140
|
static unique_ptr<ChunkInfo> Deserialize(Deserializer &source);
|
132
141
|
|
142
|
+
void FormatSerialize(FormatSerializer &serializer) const override;
|
143
|
+
static unique_ptr<ChunkInfo> FormatDeserialize(FormatDeserializer &deserializer);
|
144
|
+
|
133
145
|
private:
|
134
146
|
template <class OP>
|
135
147
|
idx_t TemplatedGetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector,
|
136
|
-
idx_t max_count);
|
148
|
+
idx_t max_count) const;
|
137
149
|
};
|
138
150
|
|
139
151
|
} // namespace duckdb
|
@@ -147,6 +147,10 @@ public:
|
|
147
147
|
|
148
148
|
void NextVector(CollectionScanState &state);
|
149
149
|
|
150
|
+
// Serialization
|
151
|
+
static void FormatSerialize(RowGroupPointer &pointer, FormatSerializer &serializer);
|
152
|
+
static RowGroupPointer FormatDeserialize(FormatDeserializer &deserializer);
|
153
|
+
|
150
154
|
private:
|
151
155
|
ChunkInfo *GetChunkInfo(idx_t vector_idx);
|
152
156
|
ColumnData &GetColumn(storage_t c);
|
@@ -16,6 +16,8 @@
|
|
16
16
|
namespace duckdb {
|
17
17
|
class ColumnList;
|
18
18
|
class PersistentTableData;
|
19
|
+
class FormatSerializer;
|
20
|
+
class FormatDeserializer;
|
19
21
|
|
20
22
|
class TableStatisticsLock {
|
21
23
|
public:
|
@@ -50,6 +52,9 @@ public:
|
|
50
52
|
void Serialize(Serializer &serializer);
|
51
53
|
void Deserialize(Deserializer &source, ColumnList &columns);
|
52
54
|
|
55
|
+
void FormatSerialize(FormatSerializer &serializer);
|
56
|
+
void FormatDeserialize(FormatDeserializer &deserializer, ColumnList &columns);
|
57
|
+
|
53
58
|
private:
|
54
59
|
//! The statistics lock
|
55
60
|
mutex stats_lock;
|
@@ -23,4 +23,10 @@ public:
|
|
23
23
|
static unique_ptr<StatementVerifier> Create(const SQLStatement &statement);
|
24
24
|
};
|
25
25
|
|
26
|
+
class DeserializedStatementVerifierNoDefaultV2 : public StatementVerifier {
|
27
|
+
public:
|
28
|
+
explicit DeserializedStatementVerifierNoDefaultV2(unique_ptr<SQLStatement> statement_p);
|
29
|
+
static unique_ptr<StatementVerifier> Create(const SQLStatement &statement);
|
30
|
+
};
|
31
|
+
|
26
32
|
} // namespace duckdb
|
@@ -1372,6 +1372,18 @@ The resulting type should be destroyed with `duckdb_destroy_logical_type`.
|
|
1372
1372
|
DUCKDB_API duckdb_logical_type duckdb_create_union_type(duckdb_logical_type member_types, const char **member_names,
|
1373
1373
|
idx_t member_count);
|
1374
1374
|
|
1375
|
+
/*!
|
1376
|
+
Creates a STRUCT type from the passed member name and type arrays.
|
1377
|
+
The resulting type should be destroyed with `duckdb_destroy_logical_type`.
|
1378
|
+
|
1379
|
+
* member_types: The array of types that the struct should consist of.
|
1380
|
+
* member_names: The array of names that the struct should consist of.
|
1381
|
+
* member_count: The number of members that were specified for both arrays.
|
1382
|
+
* returns: The logical type.
|
1383
|
+
*/
|
1384
|
+
DUCKDB_API duckdb_logical_type duckdb_create_struct_type(duckdb_logical_type *member_types, const char **member_names,
|
1385
|
+
idx_t member_count);
|
1386
|
+
|
1375
1387
|
/*!
|
1376
1388
|
Creates a `duckdb_logical_type` of type decimal with the specified width and scale
|
1377
1389
|
The resulting type should be destroyed with `duckdb_destroy_logical_type`.
|
@@ -51,6 +51,28 @@ duckdb_logical_type duckdb_create_union_type(duckdb_logical_type member_types_p,
|
|
51
51
|
return reinterpret_cast<duckdb_logical_type>(mtype);
|
52
52
|
}
|
53
53
|
|
54
|
+
duckdb_logical_type duckdb_create_struct_type(duckdb_logical_type *member_types_p, const char **member_names,
|
55
|
+
idx_t member_count) {
|
56
|
+
if (!member_types_p || !member_names) {
|
57
|
+
return nullptr;
|
58
|
+
}
|
59
|
+
duckdb::LogicalType **member_types = (duckdb::LogicalType **)member_types_p;
|
60
|
+
for (idx_t i = 0; i < member_count; i++) {
|
61
|
+
if (!member_names[i] || !member_types[i]) {
|
62
|
+
return nullptr;
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
duckdb::LogicalType *mtype = new duckdb::LogicalType;
|
67
|
+
duckdb::child_list_t<duckdb::LogicalType> members;
|
68
|
+
|
69
|
+
for (idx_t i = 0; i < member_count; i++) {
|
70
|
+
members.push_back(make_pair(member_names[i], *member_types[i]));
|
71
|
+
}
|
72
|
+
*mtype = duckdb::LogicalType::STRUCT(members);
|
73
|
+
return reinterpret_cast<duckdb_logical_type>(mtype);
|
74
|
+
}
|
75
|
+
|
54
76
|
duckdb_logical_type duckdb_create_map_type(duckdb_logical_type key_type, duckdb_logical_type value_type) {
|
55
77
|
if (!key_type || !value_type) {
|
56
78
|
return nullptr;
|
@@ -1,5 +1,6 @@
|
|
1
1
|
#include "duckdb/main/client_context_file_opener.hpp"
|
2
2
|
|
3
|
+
#include "duckdb/common/file_opener.hpp"
|
3
4
|
#include "duckdb/main/client_context.hpp"
|
4
5
|
|
5
6
|
namespace duckdb {
|
@@ -8,6 +9,11 @@ bool ClientContextFileOpener::TryGetCurrentSetting(const string &key, Value &res
|
|
8
9
|
return context.TryGetCurrentSetting(key, result);
|
9
10
|
}
|
10
11
|
|
12
|
+
// LCOV_EXCL_START
|
13
|
+
bool ClientContextFileOpener::TryGetCurrentSetting(const string &key, Value &result, FileOpenerInfo &) {
|
14
|
+
return context.TryGetCurrentSetting(key, result);
|
15
|
+
}
|
16
|
+
|
11
17
|
ClientContext *FileOpener::TryGetClientContext(FileOpener *opener) {
|
12
18
|
if (!opener) {
|
13
19
|
return nullptr;
|
@@ -22,4 +28,15 @@ bool FileOpener::TryGetCurrentSetting(FileOpener *opener, const string &key, Val
|
|
22
28
|
return opener->TryGetCurrentSetting(key, result);
|
23
29
|
}
|
24
30
|
|
31
|
+
bool FileOpener::TryGetCurrentSetting(FileOpener *opener, const string &key, Value &result, FileOpenerInfo &info) {
|
32
|
+
if (!opener) {
|
33
|
+
return false;
|
34
|
+
}
|
35
|
+
return opener->TryGetCurrentSetting(key, result, info);
|
36
|
+
}
|
37
|
+
|
38
|
+
bool FileOpener::TryGetCurrentSetting(const string &key, Value &result, FileOpenerInfo &info) {
|
39
|
+
return this->TryGetCurrentSetting(key, result);
|
40
|
+
}
|
41
|
+
// LCOV_EXCL_STOP
|
25
42
|
} // namespace duckdb
|
@@ -41,6 +41,7 @@ PreservedError ClientContext::VerifyQuery(ClientContextLock &lock, const string
|
|
41
41
|
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::COPIED, stmt));
|
42
42
|
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::DESERIALIZED, stmt));
|
43
43
|
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::DESERIALIZED_V2, stmt));
|
44
|
+
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::DESERIALIZED_V2_NO_DEFAULT, stmt));
|
44
45
|
statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::UNOPTIMIZED, stmt));
|
45
46
|
prepared_statement_verifier = StatementVerifier::Create(VerificationType::PREPARED, stmt);
|
46
47
|
#ifdef DUCKDB_DEBUG_ASYNC_SINK_SOURCE
|
@@ -285,7 +285,7 @@ idx_t CGroupBandwidthQuota(idx_t physical_cores, FileSystem &fs) {
|
|
285
285
|
}
|
286
286
|
}
|
287
287
|
|
288
|
-
idx_t
|
288
|
+
idx_t DBConfig::GetSystemMaxThreads(FileSystem &fs) {
|
289
289
|
#ifndef DUCKDB_NO_THREADS
|
290
290
|
idx_t physical_cores = std::thread::hardware_concurrency();
|
291
291
|
#ifdef __linux__
|
@@ -301,7 +301,7 @@ idx_t GetSystemMaxThreadsInternal(FileSystem &fs) {
|
|
301
301
|
|
302
302
|
void DBConfig::SetDefaultMaxThreads() {
|
303
303
|
#ifndef DUCKDB_NO_THREADS
|
304
|
-
options.maximum_threads =
|
304
|
+
options.maximum_threads = GetSystemMaxThreads(*file_system);
|
305
305
|
#else
|
306
306
|
options.maximum_threads = 1;
|
307
307
|
#endif
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#include "duckdb/main/connection.hpp"
|
2
2
|
|
3
3
|
#include "duckdb/common/types/column/column_data_collection.hpp"
|
4
|
-
#include "duckdb/execution/operator/
|
4
|
+
#include "duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp"
|
5
5
|
#include "duckdb/function/table/read_csv.hpp"
|
6
6
|
#include "duckdb/main/appender.hpp"
|
7
7
|
#include "duckdb/main/client_context.hpp"
|
@@ -219,11 +219,11 @@ shared_ptr<Relation> Connection::Values(const string &values, const vector<strin
|
|
219
219
|
}
|
220
220
|
|
221
221
|
shared_ptr<Relation> Connection::ReadCSV(const string &csv_file) {
|
222
|
-
|
222
|
+
CSVReaderOptions options;
|
223
223
|
return ReadCSV(csv_file, options);
|
224
224
|
}
|
225
225
|
|
226
|
-
shared_ptr<Relation> Connection::ReadCSV(const string &csv_file,
|
226
|
+
shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, CSVReaderOptions &options) {
|
227
227
|
options.file_path = csv_file;
|
228
228
|
options.auto_detect = true;
|
229
229
|
return make_shared<ReadCSVRelation>(context, csv_file, options);
|
@@ -1,18 +1,21 @@
|
|
1
1
|
#include "duckdb/main/relation/read_csv_relation.hpp"
|
2
|
-
|
3
|
-
#include "duckdb/
|
4
|
-
#include "duckdb/
|
5
|
-
#include "duckdb/
|
2
|
+
|
3
|
+
#include "duckdb/common/string_util.hpp"
|
4
|
+
#include "duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp"
|
5
|
+
#include "duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp"
|
6
|
+
#include "duckdb/execution/operator/scan/csv/csv_sniffer.hpp"
|
6
7
|
#include "duckdb/parser/expression/columnref_expression.hpp"
|
7
8
|
#include "duckdb/parser/expression/comparison_expression.hpp"
|
8
9
|
#include "duckdb/parser/expression/constant_expression.hpp"
|
9
10
|
#include "duckdb/parser/expression/function_expression.hpp"
|
10
|
-
#include "duckdb/
|
11
|
-
#include "duckdb/
|
11
|
+
#include "duckdb/parser/expression/star_expression.hpp"
|
12
|
+
#include "duckdb/parser/query_node/select_node.hpp"
|
13
|
+
#include "duckdb/parser/tableref/basetableref.hpp"
|
14
|
+
#include "duckdb/parser/tableref/table_function_ref.hpp"
|
12
15
|
|
13
16
|
namespace duckdb {
|
14
17
|
|
15
|
-
ReadCSVRelation::ReadCSVRelation(const
|
18
|
+
ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file,
|
16
19
|
vector<ColumnDefinition> columns_p, string alias_p)
|
17
20
|
: TableFunctionRelation(context, "read_csv", {Value(csv_file)}, nullptr, false), alias(std::move(alias_p)),
|
18
21
|
auto_detect(false) {
|
@@ -31,8 +34,8 @@ ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context,
|
|
31
34
|
AddNamedParameter("columns", Value::STRUCT(std::move(column_names)));
|
32
35
|
}
|
33
36
|
|
34
|
-
ReadCSVRelation::ReadCSVRelation(const
|
35
|
-
|
37
|
+
ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file,
|
38
|
+
CSVReaderOptions options, string alias_p)
|
36
39
|
: TableFunctionRelation(context, "read_csv_auto", {Value(csv_file)}, nullptr, false), alias(std::move(alias_p)),
|
37
40
|
auto_detect(true) {
|
38
41
|
|
@@ -42,10 +45,13 @@ ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context,
|
|
42
45
|
|
43
46
|
// Force auto_detect for this constructor
|
44
47
|
options.auto_detect = true;
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
48
|
+
auto bm_file_handle = BaseCSVReader::OpenCSV(*context, options);
|
49
|
+
auto buffer_manager = make_shared<CSVBufferManager>(*context, std::move(bm_file_handle), options);
|
50
|
+
CSVStateMachineCache state_machine_cache;
|
51
|
+
CSVSniffer sniffer(options, buffer_manager, state_machine_cache);
|
52
|
+
auto sniffer_result = sniffer.SniffCSV();
|
53
|
+
auto &types = sniffer_result.return_types;
|
54
|
+
auto &names = sniffer_result.names;
|
49
55
|
for (idx_t i = 0; i < types.size(); i++) {
|
50
56
|
columns.emplace_back(names[i], types[i]);
|
51
57
|
}
|
@@ -51,7 +51,7 @@ private:
|
|
51
51
|
//! Debugging state: number of times blocked
|
52
52
|
int debug_blocked_count = 0;
|
53
53
|
//! Number of times the Finalize will block before actually returning data
|
54
|
-
int debug_blocked_target_count =
|
54
|
+
int debug_blocked_target_count = 10;
|
55
55
|
#endif
|
56
56
|
};
|
57
57
|
|
@@ -2,8 +2,6 @@
|
|
2
2
|
|
3
3
|
#include "duckdb/common/limits.hpp"
|
4
4
|
#include "duckdb/common/field_writer.hpp"
|
5
|
-
#include "duckdb/common/serializer/format_serializer.hpp"
|
6
|
-
#include "duckdb/common/serializer/format_deserializer.hpp"
|
7
5
|
|
8
6
|
namespace duckdb {
|
9
7
|
|
@@ -160,12 +158,6 @@ void PivotColumnEntry::Serialize(Serializer &serializer) const {
|
|
160
158
|
writer.Finalize();
|
161
159
|
}
|
162
160
|
|
163
|
-
void PivotColumnEntry::FormatSerialize(FormatSerializer &serializer) const {
|
164
|
-
serializer.WriteProperty(100, "values", values);
|
165
|
-
serializer.WriteOptionalProperty(101, "star_expr", star_expr);
|
166
|
-
serializer.WriteProperty(102, "alias", alias);
|
167
|
-
}
|
168
|
-
|
169
161
|
PivotColumnEntry PivotColumnEntry::Deserialize(Deserializer &source) {
|
170
162
|
PivotColumnEntry result;
|
171
163
|
FieldReader reader(source);
|
@@ -176,14 +168,6 @@ PivotColumnEntry PivotColumnEntry::Deserialize(Deserializer &source) {
|
|
176
168
|
return result;
|
177
169
|
}
|
178
170
|
|
179
|
-
PivotColumnEntry PivotColumnEntry::FormatDeserialize(FormatDeserializer &source) {
|
180
|
-
PivotColumnEntry result;
|
181
|
-
source.ReadProperty(100, "values", result.values);
|
182
|
-
source.ReadOptionalProperty(101, "star_expr", result.star_expr);
|
183
|
-
source.ReadProperty(102, "alias", result.alias);
|
184
|
-
return result;
|
185
|
-
}
|
186
|
-
|
187
171
|
//===--------------------------------------------------------------------===//
|
188
172
|
// PivotRef
|
189
173
|
//===--------------------------------------------------------------------===//
|
@@ -5,7 +5,7 @@
|
|
5
5
|
#include "duckdb/common/bind_helpers.hpp"
|
6
6
|
#include "duckdb/common/filename_pattern.hpp"
|
7
7
|
#include "duckdb/common/local_file_system.hpp"
|
8
|
-
#include "duckdb/execution/operator/
|
8
|
+
#include "duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp"
|
9
9
|
#include "duckdb/function/table/read_csv.hpp"
|
10
10
|
#include "duckdb/main/client_context.hpp"
|
11
11
|
#include "duckdb/main/database.hpp"
|