duckdb 0.8.2-dev2399.0 → 0.8.2-dev2669.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu-datepart.cpp +3 -3
  4. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +1 -1
  5. package/src/duckdb/src/catalog/default/default_functions.cpp +5 -0
  6. package/src/duckdb/src/common/enum_util.cpp +35 -1
  7. package/src/duckdb/src/common/http_state.cpp +78 -0
  8. package/src/duckdb/src/core_functions/function_list.cpp +2 -2
  9. package/src/duckdb/src/core_functions/scalar/list/array_slice.cpp +314 -82
  10. package/src/duckdb/src/execution/expression_executor/execute_parameter.cpp +2 -2
  11. package/src/duckdb/src/execution/index/art/art.cpp +43 -31
  12. package/src/duckdb/src/execution/index/art/leaf.cpp +47 -33
  13. package/src/duckdb/src/execution/index/art/node.cpp +31 -24
  14. package/src/duckdb/src/execution/index/art/prefix.cpp +100 -16
  15. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +54 -31
  16. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +32 -15
  17. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +57 -0
  18. package/src/duckdb/src/function/table/arrow.cpp +95 -92
  19. package/src/duckdb/src/function/table/arrow_conversion.cpp +45 -68
  20. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  21. package/src/duckdb/src/include/duckdb/common/case_insensitive_map.hpp +1 -0
  22. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +8 -0
  23. package/src/duckdb/src/include/duckdb/common/helper.hpp +8 -3
  24. package/src/duckdb/src/include/duckdb/common/http_state.hpp +61 -28
  25. package/src/duckdb/src/include/duckdb/common/types/value.hpp +4 -1
  26. package/src/duckdb/src/include/duckdb/core_functions/scalar/list_functions.hpp +4 -4
  27. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +7 -5
  28. package/src/duckdb/src/include/duckdb/execution/index/art/leaf.hpp +6 -6
  29. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +6 -0
  30. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +9 -11
  31. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_index.hpp +8 -1
  32. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +99 -0
  33. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +6 -36
  34. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +3 -1
  35. package/src/duckdb/src/include/duckdb/main/client_context.hpp +15 -14
  36. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +73 -5
  37. package/src/duckdb/src/include/duckdb/main/prepared_statement_data.hpp +6 -6
  38. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +20 -3
  39. package/src/duckdb/src/include/duckdb/parser/expression/parameter_expression.hpp +17 -1
  40. package/src/duckdb/src/include/duckdb/parser/statement/execute_statement.hpp +1 -1
  41. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +5 -3
  42. package/src/duckdb/src/include/duckdb/planner/bound_parameter_map.hpp +2 -1
  43. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_data.hpp +20 -5
  44. package/src/duckdb/src/include/duckdb/planner/expression/bound_parameter_expression.hpp +3 -3
  45. package/src/duckdb/src/include/duckdb/planner/planner.hpp +4 -3
  46. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +1 -1
  47. package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +1 -1
  48. package/src/duckdb/src/include/duckdb.h +16 -0
  49. package/src/duckdb/src/main/capi/pending-c.cpp +6 -0
  50. package/src/duckdb/src/main/capi/prepared-c.cpp +52 -4
  51. package/src/duckdb/src/main/client_context.cpp +27 -17
  52. package/src/duckdb/src/main/client_verify.cpp +17 -0
  53. package/src/duckdb/src/main/extension/extension_helper.cpp +2 -1
  54. package/src/duckdb/src/main/prepared_statement.cpp +38 -11
  55. package/src/duckdb/src/main/prepared_statement_data.cpp +23 -18
  56. package/src/duckdb/src/parser/expression/parameter_expression.cpp +7 -7
  57. package/src/duckdb/src/parser/statement/execute_statement.cpp +2 -2
  58. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +13 -4
  59. package/src/duckdb/src/parser/transform/expression/transform_param_ref.cpp +45 -26
  60. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +28 -6
  61. package/src/duckdb/src/parser/transformer.cpp +27 -9
  62. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +10 -10
  63. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +13 -7
  64. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +13 -13
  65. package/src/duckdb/src/planner/planner.cpp +7 -6
  66. package/src/duckdb/src/storage/checkpoint_manager.cpp +1 -1
  67. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +3 -3
  68. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +2 -2
  69. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -11
  70. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +1 -0
  71. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +12855 -12282
  72. package/src/duckdb/ub_src_common.cpp +2 -0
  73. package/src/duckdb/ub_src_function_table_arrow.cpp +2 -0
@@ -37,6 +37,14 @@ namespace duckdb {
37
37
  #define DUCKDB_EXPLICIT_FALLTHROUGH
38
38
  #endif
39
39
 
40
+ template <class... T>
41
+ struct AlwaysFalse {
42
+ static constexpr bool value = false;
43
+ };
44
+
45
+ template<typename T>
46
+ using reference = std::reference_wrapper<T>;
47
+
40
48
  template<class _Tp, bool SAFE = true>
41
49
  struct __unique_if
42
50
  {
@@ -193,9 +201,6 @@ void AssignSharedPointer(shared_ptr<T> &target, const shared_ptr<T> &source) {
193
201
  }
194
202
  }
195
203
 
196
- template<typename T>
197
- using reference = std::reference_wrapper<T>;
198
-
199
204
  template<typename T>
200
205
  using const_reference = std::reference_wrapper<const T>;
201
206
 
@@ -16,51 +16,84 @@
16
16
 
17
17
  namespace duckdb {
18
18
 
19
- struct CachedFile {
19
+ class CachedFileHandle;
20
+
21
+ //! Represents a file that is intended to be fully downloaded, then used in parallel by multiple threads
22
+ class CachedFile : public std::enable_shared_from_this<CachedFile> {
23
+ friend class CachedFileHandle;
24
+
25
+ public:
26
+ unique_ptr<CachedFileHandle> GetHandle() {
27
+ auto this_ptr = shared_from_this();
28
+ return make_uniq<CachedFileHandle>(this_ptr);
29
+ }
30
+
31
+ private:
20
32
  //! Cached Data
21
33
  shared_ptr<char> data;
22
34
  //! Data capacity
23
35
  uint64_t capacity = 0;
24
- //! If we finished downloading the file
25
- bool finished = false;
36
+ //! Lock for initializing the file
37
+ mutex lock;
38
+ //! When initialized is set to true, the file is safe for parallel reading without holding the lock
39
+ atomic<bool> initialized = {false};
40
+ };
41
+
42
+ //! Handle to a CachedFile
43
+ class CachedFileHandle {
44
+ public:
45
+ explicit CachedFileHandle(shared_ptr<CachedFile> &file_p);
46
+
47
+ //! allocate a buffer for the file
48
+ void AllocateBuffer(idx_t size);
49
+ //! Indicate the file is fully downloaded and safe for parallel reading without lock
50
+ void SetInitialized();
51
+ //! Grow buffer to new size, copying over `bytes_to_copy` to the new buffer
52
+ void GrowBuffer(idx_t new_capacity, idx_t bytes_to_copy);
53
+ //! Write to the buffer
54
+ void Write(const char *buffer, idx_t length, idx_t offset = 0);
55
+
56
+ bool Initialized() {
57
+ return file->initialized;
58
+ }
59
+ const char *GetData() {
60
+ return file->data.get();
61
+ }
62
+ uint64_t GetCapacity() {
63
+ return file->capacity;
64
+ }
65
+
66
+ private:
67
+ unique_ptr<lock_guard<mutex>> lock;
68
+ shared_ptr<CachedFile> file;
26
69
  };
27
70
 
28
71
  class HTTPState {
29
72
  public:
73
+ //! Reset all counters and cached files
74
+ void Reset();
75
+ //! Get cache entry, create if not exists
76
+ shared_ptr<CachedFile> &GetCachedFile(const string &path);
77
+ //! Helper function to get the HTTP state
78
+ static shared_ptr<HTTPState> TryGetState(FileOpener *opener);
79
+
80
+ bool IsEmpty() {
81
+ return head_count == 0 && get_count == 0 && put_count == 0 && post_count == 0 && total_bytes_received == 0 &&
82
+ total_bytes_sent == 0;
83
+ }
84
+
30
85
  atomic<idx_t> head_count {0};
31
86
  atomic<idx_t> get_count {0};
32
87
  atomic<idx_t> put_count {0};
33
88
  atomic<idx_t> post_count {0};
34
89
  atomic<idx_t> total_bytes_received {0};
35
90
  atomic<idx_t> total_bytes_sent {0};
91
+
92
+ private:
36
93
  //! Mutex to lock when getting the cached file(Parallel Only)
37
94
  mutex cached_files_mutex;
38
95
  //! In case of fully downloading the file, the cached files of this query
39
- unordered_map<string, CachedFile> cached_files;
40
-
41
- void Reset() {
42
- head_count = 0;
43
- get_count = 0;
44
- put_count = 0;
45
- post_count = 0;
46
- total_bytes_received = 0;
47
- total_bytes_sent = 0;
48
- cached_files.clear();
49
- }
50
-
51
- //! helper function to get the HTTP
52
- static shared_ptr<HTTPState> TryGetState(FileOpener *opener) {
53
- auto client_context = FileOpener::TryGetClientContext(opener);
54
- if (client_context) {
55
- return client_context->client_data->http_state;
56
- }
57
- return nullptr;
58
- }
59
-
60
- bool IsEmpty() {
61
- return head_count == 0 && get_count == 0 && put_count == 0 && post_count == 0 && total_bytes_received == 0 &&
62
- total_bytes_sent == 0;
63
- }
96
+ unordered_map<string, shared_ptr<CachedFile>> cached_files;
64
97
  };
65
98
 
66
99
  } // namespace duckdb
@@ -177,7 +177,10 @@ public:
177
177
  template <class T>
178
178
  T GetValue() const;
179
179
  template <class T>
180
- static Value CreateValue(T value);
180
+ static Value CreateValue(T value) {
181
+ static_assert(AlwaysFalse<T>::value, "No specialization exists for this type");
182
+ return Value(nullptr);
183
+ }
181
184
  // Returns the internal value. Unlike GetValue(), this method does not perform casting, and assumes T matches the
182
185
  // type of the value. Only use this if you know what you are doing.
183
186
  template <class T>
@@ -104,11 +104,11 @@ struct ListPackFun {
104
104
 
105
105
  struct ListSliceFun {
106
106
  static constexpr const char *Name = "list_slice";
107
- static constexpr const char *Parameters = "list,begin,end";
108
- static constexpr const char *Description = "Extract a sublist using slice conventions. NULLs are interpreted as the bounds of the LIST. Negative values are accepted.";
109
- static constexpr const char *Example = "list_slice(l, 2, NULL)";
107
+ static constexpr const char *Parameters = "list,begin,end[,step]";
108
+ static constexpr const char *Description = "Extract a sublist using slice conventions. Negative values are accepted.";
109
+ static constexpr const char *Example = "list_slice(l, 2, 4)";
110
110
 
111
- static ScalarFunction GetFunction();
111
+ static ScalarFunctionSet GetFunctions();
112
112
  };
113
113
 
114
114
  struct ArraySliceFun {
@@ -35,14 +35,16 @@ public:
35
35
  //! Constructs an ART
36
36
  ART(const vector<column_t> &column_ids, TableIOManager &table_io_manager,
37
37
  const vector<unique_ptr<Expression>> &unbound_expressions, const IndexConstraintType constraint_type,
38
- AttachedDatabase &db, const idx_t block_id = DConstants::INVALID_INDEX,
39
- const idx_t block_offset = DConstants::INVALID_INDEX);
38
+ AttachedDatabase &db, const shared_ptr<vector<FixedSizeAllocator>> &allocators_ptr = nullptr,
39
+ const idx_t block_id = DConstants::INVALID_INDEX, const idx_t block_offset = DConstants::INVALID_INDEX);
40
40
  ~ART() override;
41
41
 
42
42
  //! Root of the tree
43
43
  unique_ptr<Node> tree;
44
44
  //! Fixed-size allocators holding the ART nodes
45
- vector<unique_ptr<FixedSizeAllocator>> allocators;
45
+ shared_ptr<vector<FixedSizeAllocator>> allocators;
46
+ //! True, if the ART owns its data
47
+ bool owns_data;
46
48
 
47
49
  public:
48
50
  //! Initialize a single predicate scan on the index with the given expression and column IDs
@@ -102,12 +104,12 @@ public:
102
104
 
103
105
  //! Find the node with a matching key, or return nullptr if not found
104
106
  Node Lookup(Node node, const ARTKey &key, idx_t depth);
107
+ //! Insert a key into the tree
108
+ bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
105
109
 
106
110
  private:
107
111
  //! Insert a row ID into a leaf
108
112
  bool InsertToLeaf(Node &leaf, const row_t &row_id);
109
- //! Insert a key into the tree
110
- bool Insert(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
111
113
  //! Erase a key from the tree (if a leaf has more than one value) or erase the leaf itself
112
114
  void Erase(Node &node, const ARTKey &key, idx_t depth, const row_t &row_id);
113
115
 
@@ -49,9 +49,9 @@ public:
49
49
  return *Node::GetAllocator(art, NType::LEAF).Get<Leaf>(ptr);
50
50
  }
51
51
 
52
- //! Initializes a merge by incrementing the buffer IDs of the leaf
52
+ //! Initializes a merge by incrementing the buffer IDs of the leaf (chain)
53
53
  static void InitializeMerge(ART &art, Node &node, const ARTFlags &flags);
54
- //! Merge leaves and free all copied leaf nodes
54
+ //! Merge leaves (chains) and free all copied leaf nodes
55
55
  static void Merge(ART &art, Node &l_node, Node &r_node);
56
56
 
57
57
  //! Insert a row ID into a leaf
@@ -66,15 +66,15 @@ public:
66
66
  //! Returns whether the leaf contains the row ID
67
67
  static bool ContainsRowId(ART &art, Node &node, const row_t row_id);
68
68
 
69
- //! Returns the string representation of the leaf, or only traverses and verifies the leaf
69
+ //! Returns the string representation of the leaf (chain), or only traverses and verifies the leaf (chain)
70
70
  static string VerifyAndToString(ART &art, Node &node);
71
71
 
72
- //! Serialize the leaf
72
+ //! Serialize the leaf (chain)
73
73
  static BlockPointer Serialize(ART &art, Node &node, MetaBlockWriter &writer);
74
- //! Deserialize the leaf
74
+ //! Deserialize the leaf (chain)
75
75
  static void Deserialize(ART &art, Node &node, MetaBlockReader &reader);
76
76
 
77
- //! Vacuum the leaf
77
+ //! Vacuum the leaf (chain)
78
78
  static void Vacuum(ART &art, Node &node);
79
79
 
80
80
  private:
@@ -180,6 +180,12 @@ public:
180
180
  data = 0;
181
181
  }
182
182
 
183
+ //! Adds an idx_t to a buffer ID, the rightmost 32 bits contain the buffer ID
184
+ inline void AddToBufferID(const idx_t summand) {
185
+ D_ASSERT(summand < NumericLimits<uint32_t>().Maximum());
186
+ data += summand;
187
+ }
188
+
183
189
  //! Comparison operator
184
190
  inline bool operator==(const Node &node) const {
185
191
  return data == node.data;
@@ -42,10 +42,8 @@ public:
42
42
  return *Node::GetAllocator(art, NType::PREFIX).Get<Prefix>(ptr);
43
43
  }
44
44
 
45
- //! Initializes a merge by incrementing the buffer ID of the child node(s)
46
- inline void InitializeMerge(ART &art, const ARTFlags &flags) {
47
- ptr.InitializeMerge(art, flags);
48
- }
45
+ //! Initializes a merge by incrementing the buffer ID of the prefix and its child node(s)
46
+ static void InitializeMerge(ART &art, Node &node, const ARTFlags &flags);
49
47
 
50
48
  //! Appends a byte and a child_prefix to prefix. If there is no prefix, than it pushes the
51
49
  //! byte on top of child_prefix. If there is no child_prefix, then it creates a new
@@ -75,15 +73,13 @@ public:
75
73
  //! Returns the string representation of the node, or only traverses and verifies the node and its subtree
76
74
  static string VerifyAndToString(ART &art, Node &node, const bool only_verify);
77
75
 
78
- //! Serialize this node
79
- BlockPointer Serialize(ART &art, MetaBlockWriter &writer);
80
- //! Deserialize this node
81
- void Deserialize(MetaBlockReader &reader);
76
+ //! Serialize this node and all subsequent nodes
77
+ static BlockPointer Serialize(ART &art, Node &node, MetaBlockWriter &writer);
78
+ //! Deserialize this node and all subsequent prefix nodes
79
+ static void Deserialize(ART &art, Node &node, MetaBlockReader &reader);
82
80
 
83
81
  //! Vacuum the child of the node
84
- inline void Vacuum(ART &art, const ARTFlags &flags) {
85
- ptr.Vacuum(art, flags);
86
- }
82
+ static void Vacuum(ART &art, Node &node, const ARTFlags &flags);
87
83
 
88
84
  private:
89
85
  //! Appends the byte to this prefix node, or creates a subsequent prefix node,
@@ -92,6 +88,8 @@ private:
92
88
  //! Appends the other_prefix and all its subsequent prefix nodes to this prefix node.
93
89
  //! Also frees all copied/appended nodes
94
90
  void Append(ART &art, Node other_prefix);
91
+ //! Get the total count of bytes in the chain of prefixes, with the node reference pointing to first non-prefix node
92
+ static idx_t TotalCount(ART &art, reference<Node> &node);
95
93
  };
96
94
 
97
95
  } // namespace duckdb
@@ -27,7 +27,7 @@ public:
27
27
  public:
28
28
  PhysicalCreateIndex(LogicalOperator &op, TableCatalogEntry &table, const vector<column_t> &column_ids,
29
29
  unique_ptr<CreateIndexInfo> info, vector<unique_ptr<Expression>> unbound_expressions,
30
- idx_t estimated_cardinality);
30
+ idx_t estimated_cardinality, const bool sorted);
31
31
 
32
32
  //! The table to create the index for
33
33
  DuckTableEntry &table;
@@ -37,6 +37,8 @@ public:
37
37
  unique_ptr<CreateIndexInfo> info;
38
38
  //! Unbound expressions to be used in the optimizer
39
39
  vector<unique_ptr<Expression>> unbound_expressions;
40
+ //! Whether the pipeline sorts the data prior to index creation
41
+ const bool sorted;
40
42
 
41
43
  public:
42
44
  //! Source interface, NOP for this operator
@@ -52,6 +54,11 @@ public:
52
54
  //! Sink interface, global sink state
53
55
  unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
54
56
 
57
+ //! Sink for unsorted data: insert iteratively
58
+ SinkResultType SinkUnsorted(Vector &row_identifiers, OperatorSinkInput &input) const;
59
+ //! Sink for sorted data: build + merge
60
+ SinkResultType SinkSorted(Vector &row_identifiers, OperatorSinkInput &input) const;
61
+
55
62
  SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override;
56
63
  SinkCombineResultType Combine(ExecutionContext &context, OperatorSinkCombineInput &input) const override;
57
64
  SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
@@ -0,0 +1,99 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/function/table/arrow_duck_schema.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/common/types.hpp"
12
+ #include "duckdb/common/unordered_map.hpp"
13
+ #include "duckdb/common/vector.hpp"
14
+ #include "duckdb/common/unique_ptr.hpp"
15
+
16
+ namespace duckdb {
17
+ //===--------------------------------------------------------------------===//
18
+ // Arrow Variable Size Types
19
+ //===--------------------------------------------------------------------===//
20
+ enum class ArrowVariableSizeType : uint8_t { FIXED_SIZE = 0, NORMAL = 1, SUPER_SIZE = 2 };
21
+
22
+ //===--------------------------------------------------------------------===//
23
+ // Arrow Time/Date Types
24
+ //===--------------------------------------------------------------------===//
25
+ enum class ArrowDateTimeType : uint8_t {
26
+ MILLISECONDS = 0,
27
+ MICROSECONDS = 1,
28
+ NANOSECONDS = 2,
29
+ SECONDS = 3,
30
+ DAYS = 4,
31
+ MONTHS = 5,
32
+ MONTH_DAY_NANO = 6
33
+ };
34
+
35
+ class ArrowType {
36
+ public:
37
+ //! From a DuckDB type
38
+ ArrowType(LogicalType type_p)
39
+ : type(std::move(type_p)), size_type(ArrowVariableSizeType::NORMAL),
40
+ date_time_precision(ArrowDateTimeType::DAYS) {};
41
+
42
+ //! From a DuckDB type + fixed_size
43
+ ArrowType(LogicalType type_p, idx_t fixed_size_p)
44
+ : type(std::move(type_p)), size_type(ArrowVariableSizeType::FIXED_SIZE),
45
+ date_time_precision(ArrowDateTimeType::DAYS), fixed_size(fixed_size_p) {};
46
+
47
+ //! From a DuckDB type + variable size type
48
+ ArrowType(LogicalType type_p, ArrowVariableSizeType size_type_p)
49
+ : type(std::move(type_p)), size_type(size_type_p), date_time_precision(ArrowDateTimeType::DAYS) {};
50
+
51
+ //! From a DuckDB type + datetime type
52
+ ArrowType(LogicalType type_p, ArrowDateTimeType date_time_precision_p)
53
+ : type(std::move(type_p)), size_type(ArrowVariableSizeType::NORMAL),
54
+ date_time_precision(date_time_precision_p) {};
55
+
56
+ void AddChild(unique_ptr<ArrowType> child);
57
+
58
+ void AssignChildren(vector<unique_ptr<ArrowType>> children);
59
+
60
+ const LogicalType &GetDuckType() const;
61
+
62
+ ArrowVariableSizeType GetSizeType() const;
63
+
64
+ idx_t FixedSize() const;
65
+
66
+ void SetDictionary(unique_ptr<ArrowType> dictionary);
67
+
68
+ ArrowDateTimeType GetDateTimeType() const;
69
+
70
+ const ArrowType &GetDictionary() const;
71
+
72
+ const ArrowType &operator[](idx_t index) const;
73
+
74
+ private:
75
+ LogicalType type;
76
+ //! If we have a nested type, their children's type.
77
+ vector<unique_ptr<ArrowType>> children;
78
+ //! If its a variable size type (e.g., strings, blobs, lists) holds which type it is
79
+ ArrowVariableSizeType size_type;
80
+ //! If this is a date/time holds its precision
81
+ ArrowDateTimeType date_time_precision;
82
+ //! Only for size types with fixed size
83
+ idx_t fixed_size = 0;
84
+ //! Hold the optional type if the array is a dictionary
85
+ unique_ptr<ArrowType> dictionary_type;
86
+ };
87
+
88
+ using arrow_column_map_t = unordered_map<idx_t, unique_ptr<ArrowType>>;
89
+
90
+ struct ArrowTableType {
91
+ public:
92
+ void AddColumn(idx_t index, unique_ptr<ArrowType> type);
93
+ const arrow_column_map_t &GetColumns() const;
94
+
95
+ private:
96
+ arrow_column_map_t arrow_convert_data;
97
+ };
98
+
99
+ } // namespace duckdb
@@ -16,25 +16,9 @@
16
16
  #include "duckdb/common/thread.hpp"
17
17
  #include "duckdb/common/unordered_map.hpp"
18
18
  #include "duckdb/function/built_in_functions.hpp"
19
+ #include "duckdb/function/table/arrow/arrow_duck_schema.hpp"
19
20
 
20
21
  namespace duckdb {
21
- //===--------------------------------------------------------------------===//
22
- // Arrow Variable Size Types
23
- //===--------------------------------------------------------------------===//
24
- enum class ArrowVariableSizeType : uint8_t { FIXED_SIZE = 0, NORMAL = 1, SUPER_SIZE = 2 };
25
-
26
- //===--------------------------------------------------------------------===//
27
- // Arrow Time/Date Types
28
- //===--------------------------------------------------------------------===//
29
- enum class ArrowDateTimeType : uint8_t {
30
- MILLISECONDS = 0,
31
- MICROSECONDS = 1,
32
- NANOSECONDS = 2,
33
- SECONDS = 3,
34
- DAYS = 4,
35
- MONTHS = 5,
36
- MONTH_DAY_NANO = 6
37
- };
38
22
 
39
23
  struct ArrowInterval {
40
24
  int32_t months;
@@ -46,18 +30,6 @@ struct ArrowInterval {
46
30
  }
47
31
  };
48
32
 
49
- struct ArrowConvertData {
50
- ArrowConvertData(LogicalType type) : dictionary_type(type) {};
51
- ArrowConvertData() {};
52
-
53
- //! Hold type of dictionary
54
- LogicalType dictionary_type;
55
- //! If its a variable size type (e.g., strings, blobs, lists) holds which type it is
56
- vector<pair<ArrowVariableSizeType, idx_t>> variable_sz_type;
57
- //! If this is a date/time holds its precision
58
- vector<ArrowDateTimeType> date_time_precision;
59
- };
60
-
61
33
  struct ArrowProjectedColumns {
62
34
  unordered_map<idx_t, string> projection_map;
63
35
  vector<string> columns;
@@ -73,11 +45,10 @@ typedef unique_ptr<ArrowArrayStreamWrapper> (*stream_factory_produce_t)(uintptr_
73
45
  typedef void (*stream_factory_get_schema_t)(uintptr_t stream_factory_ptr, ArrowSchemaWrapper &schema);
74
46
 
75
47
  struct ArrowScanFunctionData : public PyTableFunctionData {
48
+ public:
76
49
  ArrowScanFunctionData(stream_factory_produce_t scanner_producer_p, uintptr_t stream_factory_ptr_p)
77
50
  : lines_read(0), stream_factory_ptr(stream_factory_ptr_p), scanner_producer(scanner_producer_p) {
78
51
  }
79
- //! This holds the original list type (col_idx, [ArrowListType,size])
80
- unordered_map<idx_t, unique_ptr<ArrowConvertData>> arrow_convert_data;
81
52
  vector<LogicalType> all_types;
82
53
  atomic<idx_t> lines_read;
83
54
  ArrowSchemaWrapper schema_root;
@@ -86,6 +57,8 @@ struct ArrowScanFunctionData : public PyTableFunctionData {
86
57
  uintptr_t stream_factory_ptr;
87
58
  //! Pointer to the scanner factory produce
88
59
  stream_factory_produce_t scanner_producer;
60
+ //! Arrow table data
61
+ ArrowTableType arrow_table;
89
62
  };
90
63
 
91
64
  struct ArrowScanLocalState : public LocalTableFunctionState {
@@ -132,8 +105,7 @@ public:
132
105
  static unique_ptr<FunctionData> ArrowScanBind(ClientContext &context, TableFunctionBindInput &input,
133
106
  vector<LogicalType> &return_types, vector<string> &names);
134
107
  //! Actual conversion from Arrow to DuckDB
135
- static void ArrowToDuckDB(ArrowScanLocalState &scan_state,
136
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
108
+ static void ArrowToDuckDB(ArrowScanLocalState &scan_state, const arrow_column_map_t &arrow_convert_data,
137
109
  DataChunk &output, idx_t start, bool arrow_scan_is_projected = true);
138
110
 
139
111
  //! Get next scan state
@@ -172,9 +144,7 @@ protected:
172
144
  //! Renames repeated columns and case sensitive columns
173
145
  static void RenameArrowColumns(vector<string> &names);
174
146
  //! Helper function to get the DuckDB logical type
175
- static LogicalType GetArrowLogicalType(ArrowSchema &schema,
176
- std::unordered_map<idx_t, unique_ptr<ArrowConvertData>> &arrow_convert_data,
177
- idx_t col_idx);
147
+ static unique_ptr<ArrowType> GetArrowLogicalType(ArrowSchema &schema);
178
148
  };
179
149
 
180
150
  } // namespace duckdb
@@ -13,6 +13,7 @@
13
13
  #include "duckdb/common/types.hpp"
14
14
  #include "duckdb/common/types/data_chunk.hpp"
15
15
  #include "duckdb/main/appender.hpp"
16
+ #include "duckdb/common/case_insensitive_map.hpp"
16
17
 
17
18
  #include <cstring>
18
19
  #include <cassert>
@@ -30,8 +31,9 @@ struct DatabaseData {
30
31
  };
31
32
 
32
33
  struct PreparedStatementWrapper {
34
+ //! Map of name -> values
35
+ case_insensitive_map_t<Value> values;
33
36
  unique_ptr<PreparedStatement> statement;
34
- vector<Value> values;
35
37
  };
36
38
 
37
39
  struct ExtractStatementsWrapper {
@@ -47,7 +47,7 @@ struct ClientData;
47
47
 
48
48
  struct PendingQueryParameters {
49
49
  //! Prepared statement parameters (if any)
50
- vector<Value> *parameters = nullptr;
50
+ optional_ptr<case_insensitive_map_t<Value>> parameters;
51
51
  //! Whether or not a stream result should be allowed
52
52
  bool allow_stream_result = false;
53
53
  };
@@ -135,16 +135,17 @@ public:
135
135
  //! Create a pending query result from a prepared statement with the given name and set of parameters
136
136
  //! It is possible that the prepared statement will be re-bound. This will generally happen if the catalog is
137
137
  //! modified in between the prepared statement being bound and the prepared statement being run.
138
- DUCKDB_API unique_ptr<PendingQueryResult>
139
- PendingQuery(const string &query, shared_ptr<PreparedStatementData> &prepared, PendingQueryParameters parameters);
138
+ DUCKDB_API unique_ptr<PendingQueryResult> PendingQuery(const string &query,
139
+ shared_ptr<PreparedStatementData> &prepared,
140
+ const PendingQueryParameters &parameters);
140
141
 
141
142
  //! Execute a prepared statement with the given name and set of parameters
142
143
  //! It is possible that the prepared statement will be re-bound. This will generally happen if the catalog is
143
144
  //! modified in between the prepared statement being bound and the prepared statement being run.
144
145
  DUCKDB_API unique_ptr<QueryResult> Execute(const string &query, shared_ptr<PreparedStatementData> &prepared,
145
- vector<Value> &values, bool allow_stream_result = true);
146
+ case_insensitive_map_t<Value> &values, bool allow_stream_result = true);
146
147
  DUCKDB_API unique_ptr<QueryResult> Execute(const string &query, shared_ptr<PreparedStatementData> &prepared,
147
- PendingQueryParameters parameters);
148
+ const PendingQueryParameters &parameters);
148
149
 
149
150
  //! Gets current percentage of the query's progress, returns 0 in case the progress bar is disabled.
150
151
  DUCKDB_API double GetProgress();
@@ -198,7 +199,7 @@ private:
198
199
  PreservedError &error);
199
200
  //! Issues a query to the database and returns a Pending Query Result
200
201
  unique_ptr<PendingQueryResult> PendingQueryInternal(ClientContextLock &lock, unique_ptr<SQLStatement> statement,
201
- PendingQueryParameters parameters, bool verify = true);
202
+ const PendingQueryParameters &parameters, bool verify = true);
202
203
  unique_ptr<QueryResult> ExecutePendingQueryInternal(ClientContextLock &lock, PendingQueryResult &query);
203
204
 
204
205
  //! Parse statements from a query
@@ -214,18 +215,18 @@ private:
214
215
  unique_ptr<PendingQueryResult> PendingStatementOrPreparedStatement(ClientContextLock &lock, const string &query,
215
216
  unique_ptr<SQLStatement> statement,
216
217
  shared_ptr<PreparedStatementData> &prepared,
217
- PendingQueryParameters parameters);
218
+ const PendingQueryParameters &parameters);
218
219
  unique_ptr<PendingQueryResult> PendingPreparedStatement(ClientContextLock &lock,
219
220
  shared_ptr<PreparedStatementData> statement_p,
220
- PendingQueryParameters parameters);
221
+ const PendingQueryParameters &parameters);
221
222
 
222
223
  //! Internally prepare a SQL statement. Caller must hold the context_lock.
223
- shared_ptr<PreparedStatementData> CreatePreparedStatement(ClientContextLock &lock, const string &query,
224
- unique_ptr<SQLStatement> statement,
225
- vector<Value> *values = nullptr);
224
+ shared_ptr<PreparedStatementData>
225
+ CreatePreparedStatement(ClientContextLock &lock, const string &query, unique_ptr<SQLStatement> statement,
226
+ optional_ptr<case_insensitive_map_t<Value>> values = nullptr);
226
227
  unique_ptr<PendingQueryResult> PendingStatementInternal(ClientContextLock &lock, const string &query,
227
228
  unique_ptr<SQLStatement> statement,
228
- PendingQueryParameters parameters);
229
+ const PendingQueryParameters &parameters);
229
230
  unique_ptr<QueryResult> RunStatementInternal(ClientContextLock &lock, const string &query,
230
231
  unique_ptr<SQLStatement> statement, bool allow_stream_result,
231
232
  bool verify = true);
@@ -245,11 +246,11 @@ private:
245
246
 
246
247
  unique_ptr<PendingQueryResult> PendingStatementOrPreparedStatementInternal(
247
248
  ClientContextLock &lock, const string &query, unique_ptr<SQLStatement> statement,
248
- shared_ptr<PreparedStatementData> &prepared, PendingQueryParameters parameters);
249
+ shared_ptr<PreparedStatementData> &prepared, const PendingQueryParameters &parameters);
249
250
 
250
251
  unique_ptr<PendingQueryResult> PendingQueryPreparedInternal(ClientContextLock &lock, const string &query,
251
252
  shared_ptr<PreparedStatementData> &prepared,
252
- PendingQueryParameters parameters);
253
+ const PendingQueryParameters &parameters);
253
254
 
254
255
  unique_ptr<PendingQueryResult> PendingQueryInternal(ClientContextLock &, const shared_ptr<Relation> &relation,
255
256
  bool allow_stream_result);