duckdb 0.8.2-dev2399.0 → 0.8.2-dev2669.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +1 -0
- package/package.json +1 -1
- package/src/duckdb/extension/icu/icu-datepart.cpp +3 -3
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
- package/src/duckdb/src/catalog/default/default_functions.cpp +5 -0
- package/src/duckdb/src/common/enum_util.cpp +35 -1
- package/src/duckdb/src/common/http_state.cpp +78 -0
- package/src/duckdb/src/core_functions/function_list.cpp +2 -2
- package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +314 -82
- package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
- package/src/duckdb/src/execution/index/art/art.cpp +43 -31
- package/src/duckdb/src/execution/index/art/leaf.cpp +47 -33
- package/src/duckdb/src/execution/index/art/node.cpp +31 -24
- package/src/duckdb/src/execution/index/art/prefix.cpp +100 -16
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +54 -31
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +32 -15
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
- package/src/duckdb/src/function/table/arrow.cpp +95 -92
- package/src/duckdb/src/function/table/arrow_conversion.cpp +45 -68
- package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
- package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -1
- package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +4 -4
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +7 -5
- package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -6
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +6 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +9 -11
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +8 -1
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +15 -14
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
- package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +20 -3
- package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +17 -1
- package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -3
- package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +20 -5
- package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +3 -3
- package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +1 -1
- package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
- package/src/duckdb/src/include/duckdb.h +16 -0
- package/src/duckdb/src/main/capi/pending-c.cpp +6 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +52 -4
- package/src/duckdb/src/main/client_context.cpp +27 -17
- package/src/duckdb/src/main/client_verify.cpp +17 -0
- package/src/duckdb/src/main/extension/extension_helper.cpp +2 -1
- package/src/duckdb/src/main/prepared_statement.cpp +38 -11
- package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
- package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -7
- package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
- package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
- package/src/duckdb/src/parser/transformer.cpp +27 -9
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +13 -13
- package/src/duckdb/src/planner/planner.cpp +7 -6
- package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_expression.cpp +3 -3
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +2 -2
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12855 -12282
- package/src/duckdb/ub_src_common.cpp +2 -0
- package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
@@ -37,6 +37,14 @@ namespace duckdb {
|
|
37
37
|
#define DUCKDB_EXPLICIT_FALLTHROUGH
|
38
38
|
#endif
|
39
39
|
|
40
|
+
template <class... T>
|
41
|
+
struct AlwaysFalse {
|
42
|
+
static constexpr bool value = false;
|
43
|
+
};
|
44
|
+
|
45
|
+
template<typename T>
|
46
|
+
using reference = std::reference_wrapper<T>;
|
47
|
+
|
40
48
|
template<class _Tp, bool SAFE = true>
|
41
49
|
struct __unique_if
|
42
50
|
{
|
@@ -193,9 +201,6 @@ void AssignSharedPointer(shared_ptr<T> &target, const shared_ptr<T> &source) {
|
|
193
201
|
}
|
194
202
|
}
|
195
203
|
|
196
|
-
template<typename T>
|
197
|
-
using reference = std::reference_wrapper<T>;
|
198
|
-
|
199
204
|
template<typename T>
|
200
205
|
using const_reference = std::reference_wrapper<const T>;
|
201
206
|
|
@@ -16,51 +16,84 @@
|
|
16
16
|
|
17
17
|
namespace duckdb {
|
18
18
|
|
19
|
-
|
19
|
+
class CachedFileHandle;
|
20
|
+
|
21
|
+
//! Represents a file that is intended to be fully downloaded, then used in parallel by multiple threads
|
22
|
+
class CachedFile : public std::enable_shared_from_this<CachedFile> {
|
23
|
+
friend class CachedFileHandle;
|
24
|
+
|
25
|
+
public:
|
26
|
+
unique_ptr<CachedFileHandle> GetHandle() {
|
27
|
+
auto this_ptr = shared_from_this();
|
28
|
+
return make_uniq<CachedFileHandle>(this_ptr);
|
29
|
+
}
|
30
|
+
|
31
|
+
private:
|
20
32
|
//! Cached Data
|
21
33
|
shared_ptr<char> data;
|
22
34
|
//! Data capacity
|
23
35
|
uint64_t capacity = 0;
|
24
|
-
//!
|
25
|
-
|
36
|
+
//! Lock for initializing the file
|
37
|
+
mutex lock;
|
38
|
+
//! When initialized is set to true, the file is safe for parallel reading without holding the lock
|
39
|
+
atomic<bool> initialized = {false};
|
40
|
+
};
|
41
|
+
|
42
|
+
//! Handle to a CachedFile
|
43
|
+
class CachedFileHandle {
|
44
|
+
public:
|
45
|
+
explicit CachedFileHandle(shared_ptr<CachedFile> &file_p);
|
46
|
+
|
47
|
+
//! allocate a buffer for the file
|
48
|
+
void AllocateBuffer(idx_t size);
|
49
|
+
//! Indicate the file is fully downloaded and safe for parallel reading without lock
|
50
|
+
void SetInitialized();
|
51
|
+
//! Grow buffer to new size, copying over `bytes_to_copy` to the new buffer
|
52
|
+
void GrowBuffer(idx_t new_capacity, idx_t bytes_to_copy);
|
53
|
+
//! Write to the buffer
|
54
|
+
void Write(const char *buffer, idx_t length, idx_t offset = 0);
|
55
|
+
|
56
|
+
bool Initialized() {
|
57
|
+
return file->initialized;
|
58
|
+
}
|
59
|
+
const char *GetData() {
|
60
|
+
return file->data.get();
|
61
|
+
}
|
62
|
+
uint64_t GetCapacity() {
|
63
|
+
return file->capacity;
|
64
|
+
}
|
65
|
+
|
66
|
+
private:
|
67
|
+
unique_ptr<lock_guard<mutex>> lock;
|
68
|
+
shared_ptr<CachedFile> file;
|
26
69
|
};
|
27
70
|
|
28
71
|
class HTTPState {
|
29
72
|
public:
|
73
|
+
//! Reset all counters and cached files
|
74
|
+
void Reset();
|
75
|
+
//! Get cache entry, create if not exists
|
76
|
+
shared_ptr<CachedFile> &GetCachedFile(const string &path);
|
77
|
+
//! Helper function to get the HTTP state
|
78
|
+
static shared_ptr<HTTPState> TryGetState(FileOpener *opener);
|
79
|
+
|
80
|
+
bool IsEmpty() {
|
81
|
+
return head_count == 0 && get_count == 0 && put_count == 0 && post_count == 0 && total_bytes_received == 0 &&
|
82
|
+
total_bytes_sent == 0;
|
83
|
+
}
|
84
|
+
|
30
85
|
atomic<idx_t> head_count {0};
|
31
86
|
atomic<idx_t> get_count {0};
|
32
87
|
atomic<idx_t> put_count {0};
|
33
88
|
atomic<idx_t> post_count {0};
|
34
89
|
atomic<idx_t> total_bytes_received {0};
|
35
90
|
atomic<idx_t> total_bytes_sent {0};
|
91
|
+
|
92
|
+
private:
|
36
93
|
//! Mutex to lock when getting the cached file(Parallel Only)
|
37
94
|
mutex cached_files_mutex;
|
38
95
|
//! In case of fully downloading the file, the cached files of this query
|
39
|
-
unordered_map<string, CachedFile
|
40
|
-
|
41
|
-
void Reset() {
|
42
|
-
head_count = 0;
|
43
|
-
get_count = 0;
|
44
|
-
put_count = 0;
|
45
|
-
post_count = 0;
|
46
|
-
total_bytes_received = 0;
|
47
|
-
total_bytes_sent = 0;
|
48
|
-
cached_files.clear();
|
49
|
-
}
|
50
|
-
|
51
|
-
//! helper function to get the HTTP
|
52
|
-
static shared_ptr<HTTPState> TryGetState(FileOpener *opener) {
|
53
|
-
auto client_context = FileOpener::TryGetClientContext(opener);
|
54
|
-
if (client_context) {
|
55
|
-
return client_context->client_data->http_state;
|
56
|
-
}
|
57
|
-
return nullptr;
|
58
|
-
}
|
59
|
-
|
60
|
-
bool IsEmpty() {
|
61
|
-
return head_count == 0 && get_count == 0 && put_count == 0 && post_count == 0 && total_bytes_received == 0 &&
|
62
|
-
total_bytes_sent == 0;
|
63
|
-
}
|
96
|
+
unordered_map<string, shared_ptr<CachedFile>> cached_files;
|
64
97
|
};
|
65
98
|
|
66
99
|
} // namespace duckdb
|
@@ -177,7 +177,10 @@ public:
|
|
177
177
|
template <class T>
|
178
178
|
T GetValue() const;
|
179
179
|
template <class T>
|
180
|
-
static Value CreateValue(T value)
|
180
|
+
static Value CreateValue(T value) {
|
181
|
+
static_assert(AlwaysFalse<T>::value, "No specialization exists for this type");
|
182
|
+
return Value(nullptr);
|
183
|
+
}
|
181
184
|
// Returns the internal value. Unlike GetValue(), this method does not perform casting, and assumes T matches the
|
182
185
|
// type of the value. Only use this if you know what you are doing.
|
183
186
|
template <class T>
|
@@ -104,11 +104,11 @@ struct ListPackFun {
|
|
104
104
|
|
105
105
|
struct ListSliceFun {
|
106
106
|
static constexpr const char *Name = "list_slice";
|
107
|
-
static constexpr const char *Parameters = "list,begin,end";
|
108
|
-
static constexpr const char *Description = "Extract a sublist using slice conventions.
|
109
|
-
static constexpr const char *Example = "list_slice(l, 2,
|
107
|
+
static constexpr const char *Parameters = "list,begin,end[,step]";
|
108
|
+
static constexpr const char *Description = "Extract a sublist using slice conventions. Negative values are accepted.";
|
109
|
+
static constexpr const char *Example = "list_slice(l, 2, 4)";
|
110
110
|
|
111
|
-
static
|
111
|
+
static ScalarFunctionSet GetFunctions();
|
112
112
|
};
|
113
113
|
|
114
114
|
struct ArraySliceFun {
|
@@ -35,14 +35,16 @@ public:
|
|
35
35
|
//! Constructs an ART
|
36
36
|
ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
|
37
37
|
const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
|
38
|
-
AttachedDatabase &db, const
|
39
|
-
const idx_t block_offset = DConstants::INVALID_INDEX);
|
38
|
+
AttachedDatabase &db, const shared_ptr<vector<FixedSizeAllocator>> &allocators_ptr = nullptr,
|
39
|
+
const idx_t block_id = DConstants::INVALID_INDEX, const idx_t block_offset = DConstants::INVALID_INDEX);
|
40
40
|
~ART() override;
|
41
41
|
|
42
42
|
//! Root of the tree
|
43
43
|
unique_ptr<Node> tree;
|
44
44
|
//! Fixed-size allocators holding the ART nodes
|
45
|
-
vector<
|
45
|
+
shared_ptr<vector<FixedSizeAllocator>> allocators;
|
46
|
+
//! True, if the ART owns its data
|
47
|
+
bool owns_data;
|
46
48
|
|
47
49
|
public:
|
48
50
|
//! Initialize a single predicate scan on the index with the given expression and column IDs
|
@@ -102,12 +104,12 @@ public:
|
|
102
104
|
|
103
105
|
//! Find the node with a matching key, or return nullptr if not found
|
104
106
|
Node Lookup(Node node, const ARTKey &key, idx_t depth);
|
107
|
+
//! Insert a key into the tree
|
108
|
+
bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
|
105
109
|
|
106
110
|
private:
|
107
111
|
//! Insert a row ID into a leaf
|
108
112
|
bool InsertToLeaf(Node &leaf, const row_t &row_id);
|
109
|
-
//! Insert a key into the tree
|
110
|
-
bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
|
111
113
|
//! Erase a key from the tree (if a leaf has more than one value) or erase the leaf itself
|
112
114
|
void Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
|
113
115
|
|
@@ -49,9 +49,9 @@ public:
|
|
49
49
|
return *Node::GetAllocator(art, NType::LEAF).Get<Leaf>(ptr);
|
50
50
|
}
|
51
51
|
|
52
|
-
//! Initializes a merge by incrementing the buffer IDs of the leaf
|
52
|
+
//! Initializes a merge by incrementing the buffer IDs of the leaf (chain)
|
53
53
|
static void InitializeMerge(ART &art, Node &node, const ARTFlags &flags);
|
54
|
-
//! Merge leaves and free all copied leaf nodes
|
54
|
+
//! Merge leaves (chains) and free all copied leaf nodes
|
55
55
|
static void Merge(ART &art, Node &l_node, Node &r_node);
|
56
56
|
|
57
57
|
//! Insert a row ID into a leaf
|
@@ -66,15 +66,15 @@ public:
|
|
66
66
|
//! Returns whether the leaf contains the row ID
|
67
67
|
static bool ContainsRowId(ART &art, Node &node, const row_t row_id);
|
68
68
|
|
69
|
-
//! Returns the string representation of the leaf, or only traverses and verifies the leaf
|
69
|
+
//! Returns the string representation of the leaf (chain), or only traverses and verifies the leaf (chain)
|
70
70
|
static string VerifyAndToString(ART &art, Node &node);
|
71
71
|
|
72
|
-
//! Serialize the leaf
|
72
|
+
//! Serialize the leaf (chain)
|
73
73
|
static BlockPointer Serialize(ART &art, Node &node, MetaBlockWriter &writer);
|
74
|
-
//! Deserialize the leaf
|
74
|
+
//! Deserialize the leaf (chain)
|
75
75
|
static void Deserialize(ART &art, Node &node, MetaBlockReader &reader);
|
76
76
|
|
77
|
-
//! Vacuum the leaf
|
77
|
+
//! Vacuum the leaf (chain)
|
78
78
|
static void Vacuum(ART &art, Node &node);
|
79
79
|
|
80
80
|
private:
|
@@ -180,6 +180,12 @@ public:
|
|
180
180
|
data = 0;
|
181
181
|
}
|
182
182
|
|
183
|
+
//! Adds an idx_t to a buffer ID, the rightmost 32 bits contain the buffer ID
|
184
|
+
inline void AddToBufferID(const idx_t summand) {
|
185
|
+
D_ASSERT(summand < NumericLimits<uint32_t>().Maximum());
|
186
|
+
data += summand;
|
187
|
+
}
|
188
|
+
|
183
189
|
//! Comparison operator
|
184
190
|
inline bool operator==(const Node &node) const {
|
185
191
|
return data == node.data;
|
@@ -42,10 +42,8 @@ public:
|
|
42
42
|
return *Node::GetAllocator(art, NType::PREFIX).Get<Prefix>(ptr);
|
43
43
|
}
|
44
44
|
|
45
|
-
//! Initializes a merge by incrementing the buffer ID of the child node(s)
|
46
|
-
|
47
|
-
ptr.InitializeMerge(art, flags);
|
48
|
-
}
|
45
|
+
//! Initializes a merge by incrementing the buffer ID of the prefix and its child node(s)
|
46
|
+
static void InitializeMerge(ART &art, Node &node, const ARTFlags &flags);
|
49
47
|
|
50
48
|
//! Appends a byte and a child_prefix to prefix. If there is no prefix, than it pushes the
|
51
49
|
//! byte on top of child_prefix. If there is no child_prefix, then it creates a new
|
@@ -75,15 +73,13 @@ public:
|
|
75
73
|
//! Returns the string representation of the node, or only traverses and verifies the node and its subtree
|
76
74
|
static string VerifyAndToString(ART &art, Node &node, const bool only_verify);
|
77
75
|
|
78
|
-
//! Serialize this node
|
79
|
-
BlockPointer Serialize(ART &art, MetaBlockWriter &writer);
|
80
|
-
//! Deserialize this node
|
81
|
-
void Deserialize(MetaBlockReader &reader);
|
76
|
+
//! Serialize this node and all subsequent nodes
|
77
|
+
static BlockPointer Serialize(ART &art, Node &node, MetaBlockWriter &writer);
|
78
|
+
//! Deserialize this node and all subsequent prefix nodes
|
79
|
+
static void Deserialize(ART &art, Node &node, MetaBlockReader &reader);
|
82
80
|
|
83
81
|
//! Vacuum the child of the node
|
84
|
-
|
85
|
-
ptr.Vacuum(art, flags);
|
86
|
-
}
|
82
|
+
static void Vacuum(ART &art, Node &node, const ARTFlags &flags);
|
87
83
|
|
88
84
|
private:
|
89
85
|
//! Appends the byte to this prefix node, or creates a subsequent prefix node,
|
@@ -92,6 +88,8 @@ private:
|
|
92
88
|
//! Appends the other_prefix and all its subsequent prefix nodes to this prefix node.
|
93
89
|
//! Also frees all copied/appended nodes
|
94
90
|
void Append(ART &art, Node other_prefix);
|
91
|
+
//! Get the total count of bytes in the chain of prefixes, with the node reference pointing to first non-prefix node
|
92
|
+
static idx_t TotalCount(ART &art, reference<Node> &node);
|
95
93
|
};
|
96
94
|
|
97
95
|
} // namespace duckdb
|
@@ -27,7 +27,7 @@ public:
|
|
27
27
|
public:
|
28
28
|
PhysicalCreateIndex(LogicalOperator &op, TableCatalogEntry &table, const vector<column_t> &column_ids,
|
29
29
|
unique_ptr<CreateIndexInfo> info, vector<unique_ptr<Expression>> unbound_expressions,
|
30
|
-
idx_t estimated_cardinality);
|
30
|
+
idx_t estimated_cardinality, const bool sorted);
|
31
31
|
|
32
32
|
//! The table to create the index for
|
33
33
|
DuckTableEntry &table;
|
@@ -37,6 +37,8 @@ public:
|
|
37
37
|
unique_ptr<CreateIndexInfo> info;
|
38
38
|
//! Unbound expressions to be used in the optimizer
|
39
39
|
vector<unique_ptr<Expression>> unbound_expressions;
|
40
|
+
//! Whether the pipeline sorts the data prior to index creation
|
41
|
+
const bool sorted;
|
40
42
|
|
41
43
|
public:
|
42
44
|
//! Source interface, NOP for this operator
|
@@ -52,6 +54,11 @@ public:
|
|
52
54
|
//! Sink interface, global sink state
|
53
55
|
unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
|
54
56
|
|
57
|
+
//! Sink for unsorted data: insert iteratively
|
58
|
+
SinkResultType SinkUnsorted(Vector &row_identifiers, OperatorSinkInput &input) const;
|
59
|
+
//! Sink for sorted data: build + merge
|
60
|
+
SinkResultType SinkSorted(Vector &row_identifiers, OperatorSinkInput &input) const;
|
61
|
+
|
55
62
|
SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override;
|
56
63
|
SinkCombineResultType Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const override;
|
57
64
|
SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
|
@@ -0,0 +1,99 @@
|
|
1
|
+
//===----------------------------------------------------------------------===//
|
2
|
+
// DuckDB
|
3
|
+
//
|
4
|
+
// duckdb/function/table/arrow_duck_schema.hpp
|
5
|
+
//
|
6
|
+
//
|
7
|
+
//===----------------------------------------------------------------------===//
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "duckdb/common/types.hpp"
|
12
|
+
#include "duckdb/common/unordered_map.hpp"
|
13
|
+
#include "duckdb/common/vector.hpp"
|
14
|
+
#include "duckdb/common/unique_ptr.hpp"
|
15
|
+
|
16
|
+
namespace duckdb {
|
17
|
+
//===--------------------------------------------------------------------===//
|
18
|
+
// Arrow Variable Size Types
|
19
|
+
//===--------------------------------------------------------------------===//
|
20
|
+
enum class ArrowVariableSizeType : uint8_t { FIXED_SIZE = 0, NORMAL = 1, SUPER_SIZE = 2 };
|
21
|
+
|
22
|
+
//===--------------------------------------------------------------------===//
|
23
|
+
// Arrow Time/Date Types
|
24
|
+
//===--------------------------------------------------------------------===//
|
25
|
+
enum class ArrowDateTimeType : uint8_t {
|
26
|
+
MILLISECONDS = 0,
|
27
|
+
MICROSECONDS = 1,
|
28
|
+
NANOSECONDS = 2,
|
29
|
+
SECONDS = 3,
|
30
|
+
DAYS = 4,
|
31
|
+
MONTHS = 5,
|
32
|
+
MONTH_DAY_NANO = 6
|
33
|
+
};
|
34
|
+
|
35
|
+
class ArrowType {
|
36
|
+
public:
|
37
|
+
//! From a DuckDB type
|
38
|
+
ArrowType(LogicalType type_p)
|
39
|
+
: type(std::move(type_p)), size_type(ArrowVariableSizeType::NORMAL),
|
40
|
+
date_time_precision(ArrowDateTimeType::DAYS) {};
|
41
|
+
|
42
|
+
//! From a DuckDB type + fixed_size
|
43
|
+
ArrowType(LogicalType type_p, idx_t fixed_size_p)
|
44
|
+
: type(std::move(type_p)), size_type(ArrowVariableSizeType::FIXED_SIZE),
|
45
|
+
date_time_precision(ArrowDateTimeType::DAYS), fixed_size(fixed_size_p) {};
|
46
|
+
|
47
|
+
//! From a DuckDB type + variable size type
|
48
|
+
ArrowType(LogicalType type_p, ArrowVariableSizeType size_type_p)
|
49
|
+
: type(std::move(type_p)), size_type(size_type_p), date_time_precision(ArrowDateTimeType::DAYS) {};
|
50
|
+
|
51
|
+
//! From a DuckDB type + datetime type
|
52
|
+
ArrowType(LogicalType type_p, ArrowDateTimeType date_time_precision_p)
|
53
|
+
: type(std::move(type_p)), size_type(ArrowVariableSizeType::NORMAL),
|
54
|
+
date_time_precision(date_time_precision_p) {};
|
55
|
+
|
56
|
+
void AddChild(unique_ptr<ArrowType> child);
|
57
|
+
|
58
|
+
void AssignChildren(vector<unique_ptr<ArrowType>> children);
|
59
|
+
|
60
|
+
const LogicalType &GetDuckType() const;
|
61
|
+
|
62
|
+
ArrowVariableSizeType GetSizeType() const;
|
63
|
+
|
64
|
+
idx_t FixedSize() const;
|
65
|
+
|
66
|
+
void SetDictionary(unique_ptr<ArrowType> dictionary);
|
67
|
+
|
68
|
+
ArrowDateTimeType GetDateTimeType() const;
|
69
|
+
|
70
|
+
const ArrowType &GetDictionary() const;
|
71
|
+
|
72
|
+
const ArrowType &operator[](idx_t index) const;
|
73
|
+
|
74
|
+
private:
|
75
|
+
LogicalType type;
|
76
|
+
//! If we have a nested type, their children's type.
|
77
|
+
vector<unique_ptr<ArrowType>> children;
|
78
|
+
//! If its a variable size type (e.g., strings, blobs, lists) holds which type it is
|
79
|
+
ArrowVariableSizeType size_type;
|
80
|
+
//! If this is a date/time holds its precision
|
81
|
+
ArrowDateTimeType date_time_precision;
|
82
|
+
//! Only for size types with fixed size
|
83
|
+
idx_t fixed_size = 0;
|
84
|
+
//! Hold the optional type if the array is a dictionary
|
85
|
+
unique_ptr<ArrowType> dictionary_type;
|
86
|
+
};
|
87
|
+
|
88
|
+
using arrow_column_map_t = unordered_map<idx_t, unique_ptr<ArrowType>>;
|
89
|
+
|
90
|
+
struct ArrowTableType {
|
91
|
+
public:
|
92
|
+
void AddColumn(idx_t index, unique_ptr<ArrowType> type);
|
93
|
+
const arrow_column_map_t &GetColumns() const;
|
94
|
+
|
95
|
+
private:
|
96
|
+
arrow_column_map_t arrow_convert_data;
|
97
|
+
};
|
98
|
+
|
99
|
+
} // namespace duckdb
|
@@ -16,25 +16,9 @@
|
|
16
16
|
#include "duckdb/common/thread.hpp"
|
17
17
|
#include "duckdb/common/unordered_map.hpp"
|
18
18
|
#include "duckdb/function/built_in_functions.hpp"
|
19
|
+
#include "duckdb/function/table/arrow/arrow_duck_schema.hpp"
|
19
20
|
|
20
21
|
namespace duckdb {
|
21
|
-
//===--------------------------------------------------------------------===//
|
22
|
-
// Arrow Variable Size Types
|
23
|
-
//===--------------------------------------------------------------------===//
|
24
|
-
enum class ArrowVariableSizeType : uint8_t { FIXED_SIZE = 0, NORMAL = 1, SUPER_SIZE = 2 };
|
25
|
-
|
26
|
-
//===--------------------------------------------------------------------===//
|
27
|
-
// Arrow Time/Date Types
|
28
|
-
//===--------------------------------------------------------------------===//
|
29
|
-
enum class ArrowDateTimeType : uint8_t {
|
30
|
-
MILLISECONDS = 0,
|
31
|
-
MICROSECONDS = 1,
|
32
|
-
NANOSECONDS = 2,
|
33
|
-
SECONDS = 3,
|
34
|
-
DAYS = 4,
|
35
|
-
MONTHS = 5,
|
36
|
-
MONTH_DAY_NANO = 6
|
37
|
-
};
|
38
22
|
|
39
23
|
struct ArrowInterval {
|
40
24
|
int32_t months;
|
@@ -46,18 +30,6 @@ struct ArrowInterval {
|
|
46
30
|
}
|
47
31
|
};
|
48
32
|
|
49
|
-
struct ArrowConvertData {
|
50
|
-
ArrowConvertData(LogicalType type) : dictionary_type(type) {};
|
51
|
-
ArrowConvertData() {};
|
52
|
-
|
53
|
-
//! Hold type of dictionary
|
54
|
-
LogicalType dictionary_type;
|
55
|
-
//! If its a variable size type (e.g., strings, blobs, lists) holds which type it is
|
56
|
-
vector<pair<ArrowVariableSizeType, idx_t>> variable_sz_type;
|
57
|
-
//! If this is a date/time holds its precision
|
58
|
-
vector<ArrowDateTimeType> date_time_precision;
|
59
|
-
};
|
60
|
-
|
61
33
|
struct ArrowProjectedColumns {
|
62
34
|
unordered_map<idx_t, string> projection_map;
|
63
35
|
vector<string> columns;
|
@@ -73,11 +45,10 @@ typedef unique_ptr<ArrowArrayStreamWrapper> (*stream_factory_produce_t)(uintptr_
|
|
73
45
|
typedef void (*stream_factory_get_schema_t)(uintptr_t stream_factory_ptr, ArrowSchemaWrapper &schema);
|
74
46
|
|
75
47
|
struct ArrowScanFunctionData : public PyTableFunctionData {
|
48
|
+
public:
|
76
49
|
ArrowScanFunctionData(stream_factory_produce_t scanner_producer_p, uintptr_t stream_factory_ptr_p)
|
77
50
|
: lines_read(0), stream_factory_ptr(stream_factory_ptr_p), scanner_producer(scanner_producer_p) {
|
78
51
|
}
|
79
|
-
//! This holds the original list type (col_idx, [ArrowListType,size])
|
80
|
-
unordered_map<idx_t, unique_ptr<ArrowConvertData>> arrow_convert_data;
|
81
52
|
vector<LogicalType> all_types;
|
82
53
|
atomic<idx_t> lines_read;
|
83
54
|
ArrowSchemaWrapper schema_root;
|
@@ -86,6 +57,8 @@ struct ArrowScanFunctionData : public PyTableFunctionData {
|
|
86
57
|
uintptr_t stream_factory_ptr;
|
87
58
|
//! Pointer to the scanner factory produce
|
88
59
|
stream_factory_produce_t scanner_producer;
|
60
|
+
//! Arrow table data
|
61
|
+
ArrowTableType arrow_table;
|
89
62
|
};
|
90
63
|
|
91
64
|
struct ArrowScanLocalState : public LocalTableFunctionState {
|
@@ -132,8 +105,7 @@ public:
|
|
132
105
|
static unique_ptr<FunctionData> ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
|
133
106
|
vector<LogicalType> &return_types, vector<string> &names);
|
134
107
|
//! Actual conversion from Arrow to DuckDB
|
135
|
-
static void ArrowToDuckDB(ArrowScanLocalState &scan_state,
|
136
|
-
std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
|
108
|
+
static void ArrowToDuckDB(ArrowScanLocalState &scan_state, const arrow_column_map_t &arrow_convert_data,
|
137
109
|
DataChunk &output, idx_t start, bool arrow_scan_is_projected = true);
|
138
110
|
|
139
111
|
//! Get next scan state
|
@@ -172,9 +144,7 @@ protected:
|
|
172
144
|
//! Renames repeated columns and case sensitive columns
|
173
145
|
static void RenameArrowColumns(vector<string> &names);
|
174
146
|
//! Helper function to get the DuckDB logical type
|
175
|
-
static
|
176
|
-
std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
|
177
|
-
idx_t col_idx);
|
147
|
+
static unique_ptr<ArrowType> GetArrowLogicalType(ArrowSchema &schema);
|
178
148
|
};
|
179
149
|
|
180
150
|
} // namespace duckdb
|
@@ -13,6 +13,7 @@
|
|
13
13
|
#include "duckdb/common/types.hpp"
|
14
14
|
#include "duckdb/common/types/data_chunk.hpp"
|
15
15
|
#include "duckdb/main/appender.hpp"
|
16
|
+
#include "duckdb/common/case_insensitive_map.hpp"
|
16
17
|
|
17
18
|
#include <cstring>
|
18
19
|
#include <cassert>
|
@@ -30,8 +31,9 @@ struct DatabaseData {
|
|
30
31
|
};
|
31
32
|
|
32
33
|
struct PreparedStatementWrapper {
|
34
|
+
//! Map of name -> values
|
35
|
+
case_insensitive_map_t<Value> values;
|
33
36
|
unique_ptr<PreparedStatement> statement;
|
34
|
-
vector<Value> values;
|
35
37
|
};
|
36
38
|
|
37
39
|
struct ExtractStatementsWrapper {
|
@@ -47,7 +47,7 @@ struct ClientData;
|
|
47
47
|
|
48
48
|
struct PendingQueryParameters {
|
49
49
|
//! Prepared statement parameters (if any)
|
50
|
-
|
50
|
+
optional_ptr<case_insensitive_map_t<Value>> parameters;
|
51
51
|
//! Whether or not a stream result should be allowed
|
52
52
|
bool allow_stream_result = false;
|
53
53
|
};
|
@@ -135,16 +135,17 @@ public:
|
|
135
135
|
//! Create a pending query result from a prepared statement with the given name and set of parameters
|
136
136
|
//! It is possible that the prepared statement will be re-bound. This will generally happen if the catalog is
|
137
137
|
//! modified in between the prepared statement being bound and the prepared statement being run.
|
138
|
-
DUCKDB_API unique_ptr<PendingQueryResult>
|
139
|
-
|
138
|
+
DUCKDB_API unique_ptr<PendingQueryResult> PendingQuery(const string &query,
|
139
|
+
shared_ptr<PreparedStatementData> &prepared,
|
140
|
+
const PendingQueryParameters ¶meters);
|
140
141
|
|
141
142
|
//! Execute a prepared statement with the given name and set of parameters
|
142
143
|
//! It is possible that the prepared statement will be re-bound. This will generally happen if the catalog is
|
143
144
|
//! modified in between the prepared statement being bound and the prepared statement being run.
|
144
145
|
DUCKDB_API unique_ptr<QueryResult> Execute(const string &query, shared_ptr<PreparedStatementData> &prepared,
|
145
|
-
|
146
|
+
case_insensitive_map_t<Value> &values, bool allow_stream_result = true);
|
146
147
|
DUCKDB_API unique_ptr<QueryResult> Execute(const string &query, shared_ptr<PreparedStatementData> &prepared,
|
147
|
-
PendingQueryParameters parameters);
|
148
|
+
const PendingQueryParameters ¶meters);
|
148
149
|
|
149
150
|
//! Gets current percentage of the query's progress, returns 0 in case the progress bar is disabled.
|
150
151
|
DUCKDB_API double GetProgress();
|
@@ -198,7 +199,7 @@ private:
|
|
198
199
|
PreservedError &error);
|
199
200
|
//! Issues a query to the database and returns a Pending Query Result
|
200
201
|
unique_ptr<PendingQueryResult> PendingQueryInternal(ClientContextLock &lock, unique_ptr<SQLStatement> statement,
|
201
|
-
PendingQueryParameters parameters, bool verify = true);
|
202
|
+
const PendingQueryParameters ¶meters, bool verify = true);
|
202
203
|
unique_ptr<QueryResult> ExecutePendingQueryInternal(ClientContextLock &lock, PendingQueryResult &query);
|
203
204
|
|
204
205
|
//! Parse statements from a query
|
@@ -214,18 +215,18 @@ private:
|
|
214
215
|
unique_ptr<PendingQueryResult> PendingStatementOrPreparedStatement(ClientContextLock &lock, const string &query,
|
215
216
|
unique_ptr<SQLStatement> statement,
|
216
217
|
shared_ptr<PreparedStatementData> &prepared,
|
217
|
-
PendingQueryParameters parameters);
|
218
|
+
const PendingQueryParameters ¶meters);
|
218
219
|
unique_ptr<PendingQueryResult> PendingPreparedStatement(ClientContextLock &lock,
|
219
220
|
shared_ptr<PreparedStatementData> statement_p,
|
220
|
-
PendingQueryParameters parameters);
|
221
|
+
const PendingQueryParameters ¶meters);
|
221
222
|
|
222
223
|
//! Internally prepare a SQL statement. Caller must hold the context_lock.
|
223
|
-
shared_ptr<PreparedStatementData>
|
224
|
-
|
225
|
-
|
224
|
+
shared_ptr<PreparedStatementData>
|
225
|
+
CreatePreparedStatement(ClientContextLock &lock, const string &query, unique_ptr<SQLStatement> statement,
|
226
|
+
optional_ptr<case_insensitive_map_t<Value>> values = nullptr);
|
226
227
|
unique_ptr<PendingQueryResult> PendingStatementInternal(ClientContextLock &lock, const string &query,
|
227
228
|
unique_ptr<SQLStatement> statement,
|
228
|
-
PendingQueryParameters parameters);
|
229
|
+
const PendingQueryParameters ¶meters);
|
229
230
|
unique_ptr<QueryResult> RunStatementInternal(ClientContextLock &lock, const string &query,
|
230
231
|
unique_ptr<SQLStatement> statement, bool allow_stream_result,
|
231
232
|
bool verify = true);
|
@@ -245,11 +246,11 @@ private:
|
|
245
246
|
|
246
247
|
unique_ptr<PendingQueryResult> PendingStatementOrPreparedStatementInternal(
|
247
248
|
ClientContextLock &lock, const string &query, unique_ptr<SQLStatement> statement,
|
248
|
-
shared_ptr<PreparedStatementData> &prepared, PendingQueryParameters parameters);
|
249
|
+
shared_ptr<PreparedStatementData> &prepared, const PendingQueryParameters ¶meters);
|
249
250
|
|
250
251
|
unique_ptr<PendingQueryResult> PendingQueryPreparedInternal(ClientContextLock &lock, const string &query,
|
251
252
|
shared_ptr<PreparedStatementData> &prepared,
|
252
|
-
PendingQueryParameters parameters);
|
253
|
+
const PendingQueryParameters ¶meters);
|
253
254
|
|
254
255
|
unique_ptr<PendingQueryResult> PendingQueryInternal(ClientContextLock &, const shared_ptr<Relation> &relation,
|
255
256
|
bool allow_stream_result);
|