duckdb 0.7.2-dev3117.0 → 0.7.2-dev3154.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +7 -0
- package/src/duckdb/extension/parquet/parquet-extension.cpp +42 -0
- package/src/duckdb/extension/parquet/parquet_writer.cpp +23 -9
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/types/vector.cpp +4 -5
- package/src/duckdb/src/common/types/vector_buffer.cpp +1 -1
- package/src/duckdb/src/core_functions/function_list.cpp +1 -0
- package/src/duckdb/src/core_functions/scalar/map/map_concat.cpp +186 -0
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +65 -21
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +494 -0
- package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +16 -6
- package/src/duckdb/src/execution/window_segment_tree.cpp +17 -13
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/core_functions/scalar/map_functions.hpp +14 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +13 -0
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp +72 -0
- package/src/duckdb/src/include/duckdb/function/copy_function.hpp +3 -1
- package/src/duckdb/src/planner/operator/logical_delete.cpp +2 -0
- package/src/duckdb/src/planner/operator/logical_update.cpp +2 -0
- package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +1 -0
- package/src/duckdb/ub_src_core_functions_scalar_map.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/execution/operator/persistent/physical_fixed_batch_copy.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/execution/physical_operator.hpp"
|
12
|
+
#include "duckdb/parser/parsed_data/copy_info.hpp"
|
13
|
+
#include "duckdb/function/copy_function.hpp"
|
14
|
+
#include "duckdb/common/file_system.hpp"
|
15
|
+
#include "duckdb/common/filename_pattern.hpp"
|
16
|
+
|
17
|
+
namespace duckdb {
|
18
|
+
|
19
|
+
class PhysicalFixedBatchCopy : public PhysicalOperator {
|
20
|
+
public:
|
21
|
+
static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::FIXED_BATCH_COPY_TO_FILE;
|
22
|
+
|
23
|
+
public:
|
24
|
+
PhysicalFixedBatchCopy(vector<LogicalType> types, CopyFunction function, unique_ptr<FunctionData> bind_data,
|
25
|
+
idx_t estimated_cardinality);
|
26
|
+
|
27
|
+
CopyFunction function;
|
28
|
+
unique_ptr<FunctionData> bind_data;
|
29
|
+
string file_path;
|
30
|
+
bool use_tmp_file;
|
31
|
+
|
32
|
+
public:
|
33
|
+
// Source interface
|
34
|
+
SourceResultType GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const override;
|
35
|
+
|
36
|
+
bool IsSource() const override {
|
37
|
+
return true;
|
38
|
+
}
|
39
|
+
|
40
|
+
public:
|
41
|
+
// Sink interface
|
42
|
+
SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override;
|
43
|
+
void Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const override;
|
44
|
+
SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
45
|
+
GlobalSinkState &gstate) const override;
|
46
|
+
unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
|
47
|
+
unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
|
48
|
+
void NextBatch(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p) const override;
|
49
|
+
|
50
|
+
bool RequiresBatchIndex() const override {
|
51
|
+
return true;
|
52
|
+
}
|
53
|
+
|
54
|
+
bool IsSink() const override {
|
55
|
+
return true;
|
56
|
+
}
|
57
|
+
|
58
|
+
bool ParallelSink() const override {
|
59
|
+
return true;
|
60
|
+
}
|
61
|
+
|
62
|
+
public:
|
63
|
+
void AddRawBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t batch_index,
|
64
|
+
unique_ptr<ColumnDataCollection> collection) const;
|
65
|
+
void RepartitionBatches(ClientContext &context, GlobalSinkState &gstate_p, idx_t min_index,
|
66
|
+
bool final = false) const;
|
67
|
+
void FlushBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t min_index) const;
|
68
|
+
bool ExecuteTask(ClientContext &context, GlobalSinkState &gstate_p) const;
|
69
|
+
void ExecuteTasks(ClientContext &context, GlobalSinkState &gstate_p) const;
|
70
|
+
SinkFinalizeType FinalFlush(ClientContext &context, GlobalSinkState &gstate_p) const;
|
71
|
+
};
|
72
|
+
} // namespace duckdb
|
@@ -97,6 +97,7 @@ typedef unique_ptr<PreparedBatchData> (*copy_prepare_batch_t)(ClientContext &con
|
|
97
97
|
unique_ptr<ColumnDataCollection> collection);
|
98
98
|
typedef void (*copy_flush_batch_t)(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate,
|
99
99
|
PreparedBatchData &batch);
|
100
|
+
typedef idx_t (*copy_desired_batch_size_t)(ClientContext &context, FunctionData &bind_data);
|
100
101
|
|
101
102
|
class CopyFunction : public Function {
|
102
103
|
public:
|
@@ -104,7 +105,7 @@ public:
|
|
104
105
|
: Function(name), plan(nullptr), copy_to_bind(nullptr), copy_to_initialize_local(nullptr),
|
105
106
|
copy_to_initialize_global(nullptr), copy_to_sink(nullptr), copy_to_combine(nullptr),
|
106
107
|
copy_to_finalize(nullptr), execution_mode(nullptr), prepare_batch(nullptr), flush_batch(nullptr),
|
107
|
-
serialize(nullptr), deserialize(nullptr), copy_from_bind(nullptr) {
|
108
|
+
desired_batch_size(nullptr), serialize(nullptr), deserialize(nullptr), copy_from_bind(nullptr) {
|
108
109
|
}
|
109
110
|
|
110
111
|
//! Plan rewrite copy function
|
@@ -120,6 +121,7 @@ public:
|
|
120
121
|
|
121
122
|
copy_prepare_batch_t prepare_batch;
|
122
123
|
copy_flush_batch_t flush_batch;
|
124
|
+
copy_desired_batch_size_t desired_batch_size;
|
123
125
|
|
124
126
|
copy_to_serialize_t serialize;
|
125
127
|
copy_to_deserialize_t deserialize;
|
@@ -15,6 +15,7 @@ void LogicalDelete::Serialize(FieldWriter &writer) const {
|
|
15
15
|
table.Serialize(writer.GetSerializer());
|
16
16
|
writer.WriteField(table_index);
|
17
17
|
writer.WriteField(return_chunk);
|
18
|
+
writer.WriteSerializableList(this->expressions);
|
18
19
|
}
|
19
20
|
|
20
21
|
unique_ptr<LogicalOperator> LogicalDelete::Deserialize(LogicalDeserializationState &state, FieldReader &reader) {
|
@@ -26,6 +27,7 @@ unique_ptr<LogicalOperator> LogicalDelete::Deserialize(LogicalDeserializationSta
|
|
26
27
|
auto table_index = reader.ReadRequired<idx_t>();
|
27
28
|
auto result = make_uniq<LogicalDelete>(table_catalog_entry, table_index);
|
28
29
|
result->return_chunk = reader.ReadRequired<bool>();
|
30
|
+
result->expressions = reader.ReadRequiredSerializableList<duckdb::Expression>(state.gstate);
|
29
31
|
return std::move(result);
|
30
32
|
}
|
31
33
|
|
@@ -17,6 +17,7 @@ void LogicalUpdate::Serialize(FieldWriter &writer) const {
|
|
17
17
|
writer.WriteIndexList<PhysicalIndex>(columns);
|
18
18
|
writer.WriteSerializableList(bound_defaults);
|
19
19
|
writer.WriteField(update_is_del_and_insert);
|
20
|
+
writer.WriteSerializableList(this->expressions);
|
20
21
|
}
|
21
22
|
|
22
23
|
unique_ptr<LogicalOperator> LogicalUpdate::Deserialize(LogicalDeserializationState &state, FieldReader &reader) {
|
@@ -31,6 +32,7 @@ unique_ptr<LogicalOperator> LogicalUpdate::Deserialize(LogicalDeserializationSta
|
|
31
32
|
result->columns = reader.ReadRequiredIndexList<PhysicalIndex>();
|
32
33
|
result->bound_defaults = reader.ReadRequiredSerializableList<Expression>(state.gstate);
|
33
34
|
result->update_is_del_and_insert = reader.ReadRequired<bool>();
|
35
|
+
result->expressions = reader.ReadRequiredSerializableList<duckdb::Expression>(state.gstate);
|
34
36
|
return std::move(result);
|
35
37
|
}
|
36
38
|
|
@@ -8,6 +8,8 @@
|
|
8
8
|
|
9
9
|
#include "src/execution/operator/persistent/csv_reader_options.cpp"
|
10
10
|
|
11
|
+
#include "src/execution/operator/persistent/physical_fixed_batch_copy.cpp"
|
12
|
+
|
11
13
|
#include "src/execution/operator/persistent/physical_batch_copy_to_file.cpp"
|
12
14
|
|
13
15
|
#include "src/execution/operator/persistent/physical_batch_insert.cpp"
|