duckdb 0.8.2-dev2399.0 → 0.8.2-dev2509.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,51 +16,84 @@
16
16
 
17
17
  namespace duckdb {
18
18
 
19
- struct CachedFile {
19
+ class CachedFileHandle;
20
+
21
+ //! Represents a file that is intended to be fully downloaded, then used in parallel by multiple threads
22
+ class CachedFile : public std::enable_shared_from_this<CachedFile> {
23
+ friend class CachedFileHandle;
24
+
25
+ public:
26
+ unique_ptr<CachedFileHandle> GetHandle() {
27
+ auto this_ptr = shared_from_this();
28
+ return make_uniq<CachedFileHandle>(this_ptr);
29
+ }
30
+
31
+ private:
20
32
  //! Cached Data
21
33
  shared_ptr<char> data;
22
34
  //! Data capacity
23
35
  uint64_t capacity = 0;
24
- //! If we finished downloading the file
25
- bool finished = false;
36
+ //! Lock for initializing the file
37
+ mutex lock;
38
+ //! When initialized is set to true, the file is safe for parallel reading without holding the lock
39
+ atomic<bool> initialized = {false};
40
+ };
41
+
42
+ //! Handle to a CachedFile
43
+ class CachedFileHandle {
44
+ public:
45
+ explicit CachedFileHandle(shared_ptr<CachedFile> &file_p);
46
+
47
+ //! allocate a buffer for the file
48
+ void AllocateBuffer(idx_t size);
49
+ //! Indicate the file is fully downloaded and safe for parallel reading without lock
50
+ void SetInitialized();
51
+ //! Grow buffer to new size, copying over `bytes_to_copy` to the new buffer
52
+ void GrowBuffer(idx_t new_capacity, idx_t bytes_to_copy);
53
+ //! Write to the buffer
54
+ void Write(const char *buffer, idx_t length, idx_t offset = 0);
55
+
56
+ bool Initialized() {
57
+ return file->initialized;
58
+ }
59
+ const char *GetData() {
60
+ return file->data.get();
61
+ }
62
+ uint64_t GetCapacity() {
63
+ return file->capacity;
64
+ }
65
+
66
+ private:
67
+ unique_ptr<lock_guard<mutex>> lock;
68
+ shared_ptr<CachedFile> file;
26
69
  };
27
70
 
28
71
  class HTTPState {
29
72
  public:
73
+ //! Reset all counters and cached files
74
+ void Reset();
75
+ //! Get cache entry, create if not exists
76
+ shared_ptr<CachedFile> &GetCachedFile(const string &path);
77
+ //! Helper function to get the HTTP state
78
+ static shared_ptr<HTTPState> TryGetState(FileOpener *opener);
79
+
80
+ bool IsEmpty() {
81
+ return head_count == 0 && get_count == 0 && put_count == 0 && post_count == 0 && total_bytes_received == 0 &&
82
+ total_bytes_sent == 0;
83
+ }
84
+
30
85
  atomic<idx_t> head_count {0};
31
86
  atomic<idx_t> get_count {0};
32
87
  atomic<idx_t> put_count {0};
33
88
  atomic<idx_t> post_count {0};
34
89
  atomic<idx_t> total_bytes_received {0};
35
90
  atomic<idx_t> total_bytes_sent {0};
91
+
92
+ private:
36
93
  //! Mutex to lock when getting the cached file(Parallel Only)
37
94
  mutex cached_files_mutex;
38
95
  //! In case of fully downloading the file, the cached files of this query
39
- unordered_map<string, CachedFile> cached_files;
40
-
41
- void Reset() {
42
- head_count = 0;
43
- get_count = 0;
44
- put_count = 0;
45
- post_count = 0;
46
- total_bytes_received = 0;
47
- total_bytes_sent = 0;
48
- cached_files.clear();
49
- }
50
-
51
- //! helper function to get the HTTP
52
- static shared_ptr<HTTPState> TryGetState(FileOpener *opener) {
53
- auto client_context = FileOpener::TryGetClientContext(opener);
54
- if (client_context) {
55
- return client_context->client_data->http_state;
56
- }
57
- return nullptr;
58
- }
59
-
60
- bool IsEmpty() {
61
- return head_count == 0 && get_count == 0 && put_count == 0 && post_count == 0 && total_bytes_received == 0 &&
62
- total_bytes_sent == 0;
63
- }
96
+ unordered_map<string, shared_ptr<CachedFile>> cached_files;
64
97
  };
65
98
 
66
99
  } // namespace duckdb
@@ -104,11 +104,11 @@ struct ListPackFun {
104
104
 
105
105
  struct ListSliceFun {
106
106
  static constexpr const char *Name = "list_slice";
107
- static constexpr const char *Parameters = "list,begin,end";
108
- static constexpr const char *Description = "Extract a sublist using slice conventions. NULLs are interpreted as the bounds of the LIST. Negative values are accepted.";
109
- static constexpr const char *Example = "list_slice(l, 2, NULL)";
107
+ static constexpr const char *Parameters = "list,begin,end[,step]";
108
+ static constexpr const char *Description = "Extract a sublist using slice conventions. Negative values are accepted.";
109
+ static constexpr const char *Example = "list_slice(l, 2, 4)";
110
110
 
111
- static ScalarFunction GetFunction();
111
+ static ScalarFunctionSet GetFunctions();
112
112
  };
113
113
 
114
114
  struct ArraySliceFun {
@@ -12,6 +12,7 @@
12
12
  #include "duckdb/common/vector.hpp"
13
13
  #include "duckdb/common/string_util.hpp"
14
14
  #include "duckdb/parser/qualified_name.hpp"
15
+ #include "duckdb/parser/expression/constant_expression.hpp"
15
16
 
16
17
  namespace duckdb {
17
18
  //! Represents a built-in operator expression
@@ -86,9 +87,25 @@ public:
86
87
  return "(" + entry.children[0]->ToString() + " IS NOT NULL)";
87
88
  case ExpressionType::ARRAY_EXTRACT:
88
89
  return entry.children[0]->ToString() + "[" + entry.children[1]->ToString() + "]";
89
- case ExpressionType::ARRAY_SLICE:
90
- return entry.children[0]->ToString() + "[" + entry.children[1]->ToString() + ":" +
91
- entry.children[2]->ToString() + "]";
90
+ case ExpressionType::ARRAY_SLICE: {
91
+ string begin = entry.children[1]->ToString();
92
+ if (begin == "[]") {
93
+ begin = "";
94
+ }
95
+ string end = entry.children[2]->ToString();
96
+ if (end == "[]") {
97
+ if (entry.children.size() == 4) {
98
+ end = "-";
99
+ } else {
100
+ end = "";
101
+ }
102
+ }
103
+ if (entry.children.size() == 4) {
104
+ return entry.children[0]->ToString() + "[" + begin + ":" + end + ":" + entry.children[3]->ToString() +
105
+ "]";
106
+ }
107
+ return entry.children[0]->ToString() + "[" + begin + ":" + end + "]";
108
+ }
92
109
  case ExpressionType::STRUCT_EXTRACT: {
93
110
  if (entry.children[1]->type != ExpressionType::VALUE_CONSTANT) {
94
111
  return string();
@@ -180,7 +180,8 @@ ExtensionLoadResult ExtensionHelper::LoadExtensionInternal(DuckDB &db, const std
180
180
  #endif
181
181
 
182
182
  #ifdef DUCKDB_EXTENSIONS_TEST_WITH_LOADABLE
183
- if (!initial_load && StringUtil::Contains(DUCKDB_EXTENSIONS_TEST_WITH_LOADABLE, extension)) {
183
+ // Note: weird comma's are on purpose to do easy string contains on a list of extension names
184
+ if (!initial_load && StringUtil::Contains(DUCKDB_EXTENSIONS_TEST_WITH_LOADABLE, "," + extension + ",")) {
184
185
  Connection con(db);
185
186
  auto result = con.Query((string) "LOAD '" + DUCKDB_EXTENSIONS_BUILD_PATH + "/" + extension + "/" + extension +
186
187
  ".duckdb_extension'");
@@ -27,10 +27,19 @@ unique_ptr<ParsedExpression> Transformer::TransformArrayAccess(duckdb_libpgquery
27
27
  children.push_back(std::move(result));
28
28
  if (index->is_slice) {
29
29
  // slice
30
- children.push_back(!index->lidx ? make_uniq<ConstantExpression>(Value())
31
- : TransformExpression(index->lidx));
32
- children.push_back(!index->uidx ? make_uniq<ConstantExpression>(Value())
33
- : TransformExpression(index->uidx));
30
+ // if either the lower or upper bound is not specified, we use an empty const list so that we can
31
+ // handle it in the execution
32
+ unique_ptr<ParsedExpression> lower =
33
+ index->lidx ? TransformExpression(index->lidx)
34
+ : make_uniq<ConstantExpression>(Value::LIST(LogicalType::INTEGER, vector<Value>()));
35
+ children.push_back(std::move(lower));
36
+ unique_ptr<ParsedExpression> upper =
37
+ index->uidx ? TransformExpression(index->uidx)
38
+ : make_uniq<ConstantExpression>(Value::LIST(LogicalType::INTEGER, vector<Value>()));
39
+ children.push_back(std::move(upper));
40
+ if (index->step) {
41
+ children.push_back(TransformExpression(index->step));
42
+ }
34
43
  result = make_uniq<OperatorExpression>(ExpressionType::ARRAY_SLICE, std::move(children));
35
44
  } else {
36
45
  // array access
@@ -325,6 +325,7 @@ typedef struct PGAIndices {
325
325
  bool is_slice; /* true if slice (i.e., colon present) */
326
326
  PGNode *lidx; /* slice lower bound, if any */
327
327
  PGNode *uidx; /* subscript, or slice upper bound if any */
328
+ PGNode *step; /* slice step, if any */
328
329
  } PGAIndices;
329
330
 
330
331
  /*