duckdb 0.3.5-dev992.0 → 0.4.1-dev1019.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -91,8 +91,8 @@ public:
91
91
  * under the License.
92
92
  */
93
93
 
94
- #ifndef _THRIFT_THRIFT_H_
95
- #define _THRIFT_THRIFT_H_ 1
94
+ #ifndef _DUCKDB_THRIFT_THRIFT_H_
95
+ #define _DUCKDB_THRIFT_THRIFT_H_ 1
96
96
 
97
97
 
98
98
 
@@ -121,8 +121,8 @@ public:
121
121
 
122
122
  // clang-format off
123
123
 
124
- #ifndef _THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
125
- # define _THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
124
+ #ifndef _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
125
+ # define _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
126
126
 
127
127
  #ifdef _WIN32
128
128
  #ifdef _WINSOCKAPI_
@@ -236,7 +236,7 @@ public:
236
236
  # define THRIFT_SHUT_RDWR SHUT_RDWR
237
237
  #endif
238
238
 
239
- #endif // _THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
239
+ #endif // _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
240
240
 
241
241
 
242
242
  // LICENSE_CHANGE_END
@@ -329,8 +329,8 @@ public:
329
329
  * under the License.
330
330
  */
331
331
 
332
- #ifndef _THRIFT_TLOGGING_H_
333
- #define _THRIFT_TLOGGING_H_ 1
332
+ #ifndef _DUCKDB_THRIFT_TLOGGING_H_
333
+ #define _DUCKDB_THRIFT_TLOGGING_H_ 1
334
334
 
335
335
 
336
336
 
@@ -454,7 +454,7 @@ public:
454
454
  #define T_GENERIC_PROTOCOL(template_class, generic_prot, specific_prot)
455
455
  #endif
456
456
 
457
- #endif // #ifndef _THRIFT_TLOGGING_H_
457
+ #endif // #ifndef _DUCKDB_THRIFT_TLOGGING_H_
458
458
 
459
459
 
460
460
  // LICENSE_CHANGE_END
@@ -546,7 +546,7 @@ void profile_write_pprof(FILE* gen_calls_f, FILE* virtual_calls_f);
546
546
  }
547
547
  } // duckdb_apache::thrift
548
548
 
549
- #endif // #ifndef _THRIFT_THRIFT_H_
549
+ #endif // #ifndef _DUCKDB_THRIFT_THRIFT_H_
550
550
 
551
551
 
552
552
  // LICENSE_CHANGE_END
@@ -576,8 +576,8 @@ void profile_write_pprof(FILE* gen_calls_f, FILE* virtual_calls_f);
576
576
  * under the License.
577
577
  */
578
578
 
579
- #ifndef _THRIFT_TAPPLICATIONEXCEPTION_H_
580
- #define _THRIFT_TAPPLICATIONEXCEPTION_H_ 1
579
+ #ifndef _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_
580
+ #define _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_ 1
581
581
 
582
582
 
583
583
 
@@ -671,7 +671,7 @@ protected:
671
671
  }
672
672
  } // duckdb_apache::thrift
673
673
 
674
- #endif // #ifndef _THRIFT_TAPPLICATIONEXCEPTION_H_
674
+ #endif // #ifndef _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_
675
675
 
676
676
 
677
677
  // LICENSE_CHANGE_END
@@ -701,8 +701,8 @@ protected:
701
701
  * under the License.
702
702
  */
703
703
 
704
- #ifndef _THRIFT_TBASE_H_
705
- #define _THRIFT_TBASE_H_ 1
704
+ #ifndef _DUCKDB_THRIFT_TBASE_H_
705
+ #define _DUCKDB_THRIFT_TBASE_H_ 1
706
706
 
707
707
 
708
708
 
@@ -730,8 +730,8 @@ protected:
730
730
  * under the License.
731
731
  */
732
732
 
733
- #ifndef _THRIFT_PROTOCOL_TPROTOCOL_H_
734
- #define _THRIFT_PROTOCOL_TPROTOCOL_H_ 1
733
+ #ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_
734
+ #define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ 1
735
735
 
736
736
  #ifdef _WIN32
737
737
  // Need to come before any Windows.h includes
@@ -763,8 +763,8 @@ protected:
763
763
  * under the License.
764
764
  */
765
765
 
766
- #ifndef _THRIFT_TRANSPORT_TTRANSPORT_H_
767
- #define _THRIFT_TRANSPORT_TTRANSPORT_H_ 1
766
+ #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_
767
+ #define _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_ 1
768
768
 
769
769
 
770
770
 
@@ -792,8 +792,8 @@ protected:
792
792
  * under the License.
793
793
  */
794
794
 
795
- #ifndef _THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_
796
- #define _THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ 1
795
+ #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_
796
+ #define _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ 1
797
797
 
798
798
  // FUCK OFF #include <boost/numeric/conversion/cast.hpp>
799
799
  #include <string>
@@ -878,7 +878,7 @@ protected:
878
878
  }
879
879
  } // duckdb_apache::thrift::transport
880
880
 
881
- #endif // #ifndef _THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_
881
+ #endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_
882
882
 
883
883
 
884
884
  // LICENSE_CHANGE_END
@@ -1129,7 +1129,7 @@ public:
1129
1129
  }
1130
1130
  } // duckdb_apache::thrift::transport
1131
1131
 
1132
- #endif // #ifndef _THRIFT_TRANSPORT_TTRANSPORT_H_
1132
+ #endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_
1133
1133
 
1134
1134
 
1135
1135
  // LICENSE_CHANGE_END
@@ -1159,8 +1159,8 @@ public:
1159
1159
  * under the License.
1160
1160
  */
1161
1161
 
1162
- #ifndef _THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_
1163
- #define _THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ 1
1162
+ #ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_
1163
+ #define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ 1
1164
1164
 
1165
1165
  #include <string>
1166
1166
 
@@ -1244,7 +1244,7 @@ protected:
1244
1244
  }
1245
1245
  } // duckdb_apache::thrift::protocol
1246
1246
 
1247
- #endif // #ifndef _THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_
1247
+ #endif // #ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_
1248
1248
 
1249
1249
 
1250
1250
  // LICENSE_CHANGE_END
@@ -1267,37 +1267,39 @@ protected:
1267
1267
  // but that doesn't work.
1268
1268
  // For a pretty in-depth explanation of the problem, see
1269
1269
  // http://cellperformance.beyond3d.com/articles/2006/06/understanding-strict-aliasing.html
1270
+ namespace duckdb_apache { namespace thrift {
1270
1271
  template <typename To, typename From>
1271
1272
  static inline To bitwise_cast(From from) {
1272
- static_assert(sizeof(From) == sizeof(To), "sizeof(From) == sizeof(To)");
1273
-
1274
- // BAD!!! These are all broken with -O2.
1275
- //return *reinterpret_cast<To*>(&from); // BAD!!!
1276
- //return *static_cast<To*>(static_cast<void*>(&from)); // BAD!!!
1277
- //return *(To*)(void*)&from; // BAD!!!
1278
-
1279
- // Super clean and paritally blessed by section 3.9 of the standard.
1280
- //unsigned char c[sizeof(from)];
1281
- //memcpy(c, &from, sizeof(from));
1282
- //To to;
1283
- //memcpy(&to, c, sizeof(c));
1284
- //return to;
1285
-
1286
- // Slightly more questionable.
1287
- // Same code emitted by GCC.
1288
- //To to;
1289
- //memcpy(&to, &from, sizeof(from));
1290
- //return to;
1291
-
1292
- // Technically undefined, but almost universally supported,
1293
- // and the most efficient implementation.
1294
- union {
1295
- From f;
1296
- To t;
1297
- } u;
1298
- u.f = from;
1299
- return u.t;
1273
+ static_assert(sizeof(From) == sizeof(To), "sizeof(From) == sizeof(To)");
1274
+
1275
+ // BAD!!! These are all broken with -O2.
1276
+ // return *reinterpret_cast<To*>(&from); // BAD!!!
1277
+ // return *static_cast<To*>(static_cast<void*>(&from)); // BAD!!!
1278
+ // return *(To*)(void*)&from; // BAD!!!
1279
+
1280
+ // Super clean and paritally blessed by section 3.9 of the standard.
1281
+ // unsigned char c[sizeof(from)];
1282
+ // memcpy(c, &from, sizeof(from));
1283
+ // To to;
1284
+ // memcpy(&to, c, sizeof(c));
1285
+ // return to;
1286
+
1287
+ // Slightly more questionable.
1288
+ // Same code emitted by GCC.
1289
+ // To to;
1290
+ // memcpy(&to, &from, sizeof(from));
1291
+ // return to;
1292
+
1293
+ // Technically undefined, but almost universally supported,
1294
+ // and the most efficient implementation.
1295
+ union {
1296
+ From f;
1297
+ To t;
1298
+ } u;
1299
+ u.f = from;
1300
+ return u.t;
1300
1301
  }
1302
+ }} // namespace duckdb_apache::thrift
1301
1303
 
1302
1304
 
1303
1305
  #ifdef HAVE_SYS_PARAM_H
@@ -1983,7 +1985,7 @@ uint32_t skip(Protocol_& prot, TType type) {
1983
1985
 
1984
1986
  }}} // duckdb_apache::thrift::protocol
1985
1987
 
1986
- #endif // #define _THRIFT_PROTOCOL_TPROTOCOL_H_ 1
1988
+ #endif // #define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ 1
1987
1989
 
1988
1990
 
1989
1991
  // LICENSE_CHANGE_END
@@ -2001,7 +2003,7 @@ public:
2001
2003
  }
2002
2004
  } // duckdb_apache::thrift
2003
2005
 
2004
- #endif // #ifndef _THRIFT_TBASE_H_
2006
+ #endif // #ifndef _DUCKDB_THRIFT_TBASE_H_
2005
2007
 
2006
2008
 
2007
2009
  // LICENSE_CHANGE_END
@@ -4625,7 +4627,7 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj);
4625
4627
  // LICENSE_CHANGE_END
4626
4628
 
4627
4629
 
4628
-
4630
+ #include <list>
4629
4631
 
4630
4632
 
4631
4633
  // LICENSE_CHANGE_BEGIN
@@ -4651,8 +4653,8 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj);
4651
4653
  * under the License.
4652
4654
  */
4653
4655
 
4654
- #ifndef _THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_
4655
- #define _THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_ 1
4656
+ #ifndef _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_
4657
+ #define _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_ 1
4656
4658
 
4657
4659
 
4658
4660
 
@@ -4679,8 +4681,8 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj);
4679
4681
  * under the License.
4680
4682
  */
4681
4683
 
4682
- #ifndef _THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_
4683
- #define _THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1
4684
+ #ifndef _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_
4685
+ #define _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1
4684
4686
 
4685
4687
 
4686
4688
 
@@ -5172,7 +5174,7 @@ protected:
5172
5174
  }
5173
5175
  } // duckdb_apache::thrift::protocol
5174
5176
 
5175
- #endif // #define _THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1
5177
+ #endif // #define _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1
5176
5178
 
5177
5179
 
5178
5180
  // LICENSE_CHANGE_END
@@ -5441,8 +5443,8 @@ typedef TCompactProtocolFactoryT<TTransport> TCompactProtocolFactory;
5441
5443
  * specific language governing permissions and limitations
5442
5444
  * under the License.
5443
5445
  */
5444
- #ifndef _THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_
5445
- #define _THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ 1
5446
+ #ifndef _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_
5447
+ #define _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ 1
5446
5448
 
5447
5449
  #include <limits>
5448
5450
 
@@ -6248,7 +6250,7 @@ TType TCompactProtocolT<Transport_>::getTType(int8_t type) {
6248
6250
 
6249
6251
  }}} // duckdb_apache::thrift::protocol
6250
6252
 
6251
- #endif // _THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_
6253
+ #endif // _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_
6252
6254
 
6253
6255
 
6254
6256
  // LICENSE_CHANGE_END
@@ -6284,8 +6286,8 @@ TType TCompactProtocolT<Transport_>::getTType(int8_t type) {
6284
6286
  * under the License.
6285
6287
  */
6286
6288
 
6287
- #ifndef _THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_
6288
- #define _THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ 1
6289
+ #ifndef _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_
6290
+ #define _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ 1
6289
6291
 
6290
6292
  #include <cstdlib>
6291
6293
  #include <cstddef>
@@ -6319,8 +6321,8 @@ TType TCompactProtocolT<Transport_>::getTType(int8_t type) {
6319
6321
  * under the License.
6320
6322
  */
6321
6323
 
6322
- #ifndef _THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_
6323
- #define _THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ 1
6324
+ #ifndef _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_
6325
+ #define _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ 1
6324
6326
 
6325
6327
 
6326
6328
 
@@ -6439,7 +6441,7 @@ protected:
6439
6441
  }
6440
6442
  } // duckdb_apache::thrift::transport
6441
6443
 
6442
- #endif // #ifndef _THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_
6444
+ #endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_
6443
6445
 
6444
6446
 
6445
6447
  // LICENSE_CHANGE_END
@@ -6902,7 +6904,7 @@ protected:
6902
6904
  }
6903
6905
  } // duckdb_apache::thrift::transport
6904
6906
 
6905
- #endif // #ifndef _THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_
6907
+ #endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_
6906
6908
 
6907
6909
 
6908
6910
  // LICENSE_CHANGE_END
@@ -6916,31 +6918,174 @@ protected:
6916
6918
 
6917
6919
  namespace duckdb {
6918
6920
 
6921
+ // A ReadHead for prefetching data in a specific range
6922
+ struct ReadHead {
6923
+ ReadHead(idx_t location, uint64_t size) : location(location), size(size) {};
6924
+ // Hint info
6925
+ idx_t location;
6926
+ uint64_t size;
6927
+
6928
+ // Current info
6929
+ unique_ptr<AllocatedData> data;
6930
+ bool data_isset = false;
6931
+
6932
+ idx_t GetEnd() const {
6933
+ return size + location;
6934
+ }
6935
+
6936
+ void Allocate(Allocator &allocator) {
6937
+ data = allocator.Allocate(size);
6938
+ }
6939
+ };
6940
+
6941
+ // Comparator for ReadHeads that are either overlapping, adjacent, or within ALLOW_GAP bytes from each other
6942
+ struct ReadHeadComparator {
6943
+ static constexpr uint64_t ALLOW_GAP = 1 << 14; // 16 KiB
6944
+ bool operator()(const ReadHead *a, const ReadHead *b) const {
6945
+ auto a_start = a->location;
6946
+ auto a_end = a->location + a->size;
6947
+ auto b_start = b->location;
6948
+
6949
+ if (a_end <= NumericLimits<idx_t>::Maximum() - ALLOW_GAP) {
6950
+ a_end += ALLOW_GAP;
6951
+ }
6952
+
6953
+ return a_start < b_start && a_end < b_start;
6954
+ }
6955
+ };
6956
+
6957
+ // Two-step read ahead buffer
6958
+ // 1: register all ranges that will be read, merging ranges that are consecutive
6959
+ // 2: prefetch all registered ranges
6960
+ struct ReadAheadBuffer {
6961
+ ReadAheadBuffer(Allocator &allocator, FileHandle &handle, FileOpener &opener)
6962
+ : allocator(allocator), handle(handle), file_opener(opener) {
6963
+ }
6964
+
6965
+ // The list of read heads
6966
+ std::list<ReadHead> read_heads;
6967
+ // Set for merging consecutive ranges
6968
+ std::set<ReadHead *, ReadHeadComparator> merge_set;
6969
+
6970
+ Allocator &allocator;
6971
+ FileHandle &handle;
6972
+ FileOpener &file_opener;
6973
+
6974
+ idx_t total_size = 0;
6975
+
6976
+ // Add a read head to the prefetching list
6977
+ void AddReadHead(idx_t pos, uint64_t len, bool merge_buffers = true) {
6978
+ // Attempt to merge with existing
6979
+ if (merge_buffers) {
6980
+ ReadHead new_read_head {pos, len};
6981
+ auto lookup_set = merge_set.find(&new_read_head);
6982
+ if (lookup_set != merge_set.end()) {
6983
+ auto existing_head = *lookup_set;
6984
+ auto new_start = MinValue<idx_t>(existing_head->location, new_read_head.location);
6985
+ auto new_length = MaxValue<idx_t>(existing_head->GetEnd(), new_read_head.GetEnd()) - new_start;
6986
+ existing_head->location = new_start;
6987
+ existing_head->size = new_length;
6988
+ return;
6989
+ }
6990
+ }
6991
+
6992
+ read_heads.emplace_front(ReadHead(pos, len));
6993
+ total_size += len;
6994
+ auto &read_head = read_heads.front();
6995
+
6996
+ if (merge_buffers) {
6997
+ merge_set.insert(&read_head);
6998
+ }
6999
+
7000
+ if (read_head.GetEnd() > handle.GetFileSize()) {
7001
+ throw std::runtime_error("Prefetch registered for bytes outside file");
7002
+ }
7003
+ }
7004
+
7005
+ // Returns the relevant read head
7006
+ ReadHead *GetReadHead(idx_t pos) {
7007
+ for (auto &read_head : read_heads) {
7008
+ if (pos >= read_head.location && pos < read_head.GetEnd()) {
7009
+ return &read_head;
7010
+ }
7011
+ }
7012
+ return nullptr;
7013
+ }
7014
+
7015
+ // Prefetch all read heads
7016
+ void Prefetch() {
7017
+ for (auto &read_head : read_heads) {
7018
+ read_head.Allocate(allocator);
7019
+
7020
+ if (read_head.GetEnd() > handle.GetFileSize()) {
7021
+ throw std::runtime_error("Prefetch registered requested for bytes outside file");
7022
+ }
7023
+
7024
+ handle.Read(read_head.data->get(), read_head.size, read_head.location);
7025
+ read_head.data_isset = true;
7026
+ }
7027
+ }
7028
+ };
7029
+
6919
7030
  class ThriftFileTransport : public duckdb_apache::thrift::transport::TVirtualTransport<ThriftFileTransport> {
6920
7031
  public:
6921
- ThriftFileTransport(Allocator &allocator, FileHandle &handle_p)
6922
- : allocator(allocator), handle(handle_p), location(0) {
7032
+ static constexpr uint64_t PREFETCH_FALLBACK_BUFFERSIZE = 1000000;
7033
+
7034
+ ThriftFileTransport(Allocator &allocator, FileHandle &handle_p, FileOpener &opener, bool prefetch_mode_p)
7035
+ : handle(handle_p), location(0), allocator(allocator), ra_buffer(ReadAheadBuffer(allocator, handle_p, opener)),
7036
+ prefetch_mode(prefetch_mode_p) {
6923
7037
  }
6924
7038
 
6925
7039
  uint32_t read(uint8_t *buf, uint32_t len) {
6926
- if (prefetched_data && location >= prefetch_location &&
6927
- location + len < prefetch_location + prefetched_data->GetSize()) {
6928
- memcpy(buf, prefetched_data->get() + location - prefetch_location, len);
7040
+ auto prefetch_buffer = ra_buffer.GetReadHead(location);
7041
+ if (prefetch_buffer != nullptr && location - prefetch_buffer->location + len <= prefetch_buffer->size) {
7042
+ D_ASSERT(location - prefetch_buffer->location + len <= prefetch_buffer->size);
7043
+
7044
+ if (!prefetch_buffer->data_isset) {
7045
+ prefetch_buffer->Allocate(allocator);
7046
+ handle.Read(prefetch_buffer->data->get(), prefetch_buffer->size, prefetch_buffer->location);
7047
+ prefetch_buffer->data_isset = true;
7048
+ }
7049
+ memcpy(buf, prefetch_buffer->data->get() + location - prefetch_buffer->location, len);
6929
7050
  } else {
6930
- handle.Read(buf, len, location);
7051
+ if (prefetch_mode && len < PREFETCH_FALLBACK_BUFFERSIZE && len > 0) {
7052
+ Prefetch(location, MinValue<uint64_t>(PREFETCH_FALLBACK_BUFFERSIZE, handle.GetFileSize() - location));
7053
+ auto prefetch_buffer_fallback = ra_buffer.GetReadHead(location);
7054
+ D_ASSERT(location - prefetch_buffer_fallback->location + len <= prefetch_buffer_fallback->size);
7055
+ memcpy(buf, prefetch_buffer_fallback->data->get() + location - prefetch_buffer_fallback->location, len);
7056
+ } else {
7057
+ handle.Read(buf, len, location);
7058
+ }
6931
7059
  }
6932
7060
  location += len;
6933
7061
  return len;
6934
7062
  }
6935
7063
 
6936
- void Prefetch(idx_t pos, idx_t len) {
6937
- prefetch_location = pos;
6938
- prefetched_data = allocator.Allocate(len);
6939
- handle.Read(prefetched_data->get(), len, prefetch_location);
7064
+ // Prefetch a single buffer
7065
+ void Prefetch(idx_t pos, uint64_t len) {
7066
+ RegisterPrefetch(pos, len, false);
7067
+ FinalizeRegistration();
7068
+ PrefetchRegistered();
7069
+ }
7070
+
7071
+ // Register a buffer for prefixing
7072
+ void RegisterPrefetch(idx_t pos, uint64_t len, bool can_merge = true) {
7073
+ ra_buffer.AddReadHead(pos, len, can_merge);
7074
+ }
7075
+
7076
+ // Prevents any further merges, should be called before PrefetchRegistered
7077
+ void FinalizeRegistration() {
7078
+ ra_buffer.merge_set.clear();
7079
+ }
7080
+
7081
+ // Prefetch all previously registered ranges
7082
+ void PrefetchRegistered() {
7083
+ ra_buffer.Prefetch();
6940
7084
  }
6941
7085
 
6942
7086
  void ClearPrefetch() {
6943
- prefetched_data.reset();
7087
+ ra_buffer.read_heads.clear();
7088
+ ra_buffer.merge_set.clear();
6944
7089
  }
6945
7090
 
6946
7091
  void SetLocation(idx_t location_p) {
@@ -6955,12 +7100,17 @@ public:
6955
7100
  }
6956
7101
 
6957
7102
  private:
6958
- Allocator &allocator;
6959
7103
  FileHandle &handle;
6960
7104
  idx_t location;
6961
7105
 
6962
- unique_ptr<AllocatedData> prefetched_data;
6963
- idx_t prefetch_location;
7106
+ Allocator &allocator;
7107
+
7108
+ // Multi-buffer prefetch
7109
+ ReadAheadBuffer ra_buffer;
7110
+
7111
+ // Whether the prefetch mode is enabled. In this mode the DirectIO flag of the handle will be set and the parquet
7112
+ // reader will manage the read buffering.
7113
+ bool prefetch_mode;
6964
7114
  };
6965
7115
 
6966
7116
  } // namespace duckdb
@@ -7417,8 +7567,13 @@ public:
7417
7567
  idx_t MaxDefine() const;
7418
7568
  idx_t MaxRepeat() const;
7419
7569
 
7570
+ virtual idx_t FileOffset() const;
7571
+ virtual uint64_t TotalCompressedSize();
7420
7572
  virtual idx_t GroupRowsAvailable();
7421
7573
 
7574
+ // register the range this reader will touch for prefetching
7575
+ virtual void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge);
7576
+
7422
7577
  virtual unique_ptr<BaseStatistics> Stats(const std::vector<ColumnChunk> &columns);
7423
7578
 
7424
7579
  protected:
@@ -7433,6 +7588,9 @@ protected:
7433
7588
  virtual void DictReference(Vector &result);
7434
7589
  virtual void PlainReference(shared_ptr<ByteBuffer>, Vector &result);
7435
7590
 
7591
+ // applies any skips that were registered using Skip()
7592
+ virtual void ApplyPendingSkips(idx_t num_values);
7593
+
7436
7594
  bool HasDefines() {
7437
7595
  return max_define > 0;
7438
7596
  }
@@ -7451,13 +7609,15 @@ protected:
7451
7609
  ParquetReader &reader;
7452
7610
  LogicalType type;
7453
7611
 
7612
+ idx_t pending_skips = 0;
7613
+
7454
7614
  private:
7455
7615
  void PrepareRead(parquet_filter_t &filter);
7456
7616
  void PreparePage(idx_t compressed_page_size, idx_t uncompressed_page_size);
7457
7617
  void PrepareDataPage(PageHeader &page_hdr);
7458
7618
  void PreparePageV2(PageHeader &page_hdr);
7459
7619
 
7460
- const duckdb_parquet::format::ColumnChunk *chunk;
7620
+ const duckdb_parquet::format::ColumnChunk *chunk = nullptr;
7461
7621
 
7462
7622
  duckdb_apache::thrift::protocol::TProtocol *protocol;
7463
7623
  idx_t page_rows_available;
@@ -7545,6 +7705,11 @@ class ChunkCollection;
7545
7705
  class BaseStatistics;
7546
7706
  class TableFilterSet;
7547
7707
 
7708
+ struct ParquetReaderPrefetchConfig {
7709
+ // Percentage of data in a row group span that should be scanned for enabling whole group prefetch
7710
+ static constexpr double WHOLE_GROUP_PREFETCH_MINIMUM_SCAN = 0.95;
7711
+ };
7712
+
7548
7713
  struct ParquetReaderScanState {
7549
7714
  vector<idx_t> group_idx_list;
7550
7715
  int64_t current_group;
@@ -7560,6 +7725,9 @@ struct ParquetReaderScanState {
7560
7725
 
7561
7726
  ResizeableBuffer define_buf;
7562
7727
  ResizeableBuffer repeat_buf;
7728
+
7729
+ bool prefetch_mode = false;
7730
+ bool current_group_prefetched = false;
7563
7731
  };
7564
7732
 
7565
7733
  struct ParquetOptions {
@@ -7568,6 +7736,8 @@ struct ParquetOptions {
7568
7736
  explicit ParquetOptions(ClientContext &context);
7569
7737
 
7570
7738
  bool binary_as_string = false;
7739
+ bool filename = false;
7740
+ bool hive_partitioning = false;
7571
7741
  };
7572
7742
 
7573
7743
  class ParquetReader {
@@ -7624,6 +7794,10 @@ private:
7624
7794
  idx_t depth, idx_t max_define, idx_t max_repeat,
7625
7795
  idx_t &next_schema_idx, idx_t &next_file_idx);
7626
7796
  const duckdb_parquet::format::RowGroup &GetGroup(ParquetReaderScanState &state);
7797
+ uint64_t GetGroupCompressedSize(ParquetReaderScanState &state);
7798
+ idx_t GetGroupOffset(ParquetReaderScanState &state);
7799
+ // Group span is the distance between the min page offset and the max page offset plus the max page compressed size
7800
+ uint64_t GetGroupSpan(ParquetReaderScanState &state);
7627
7801
  void PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx);
7628
7802
  LogicalType DeriveLogicalType(const SchemaElement &s_ele);
7629
7803
 
@@ -7681,7 +7855,7 @@ namespace duckdb {
7681
7855
  class BufferedSerializer;
7682
7856
  class ParquetWriter;
7683
7857
  class ColumnWriterPageState;
7684
- class StandardColumnWriterState;
7858
+ class BasicColumnWriterState;
7685
7859
 
7686
7860
  class ColumnWriterState {
7687
7861
  public:
@@ -7703,9 +7877,6 @@ public:
7703
7877
  };
7704
7878
 
7705
7879
  class ColumnWriter {
7706
- //! We limit the uncompressed page size to 100MB
7707
- // The max size in Parquet is 2GB, but we choose a more conservative limit
7708
- static constexpr const idx_t MAX_UNCOMPRESSED_PAGE_SIZE = 100000000;
7709
7880
 
7710
7881
  public:
7711
7882
  ColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path, idx_t max_repeat,
@@ -7729,46 +7900,35 @@ public:
7729
7900
  idx_t max_repeat = 0, idx_t max_define = 1,
7730
7901
  bool can_have_nulls = true);
7731
7902
 
7732
- virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group);
7733
- virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count);
7903
+ virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) = 0;
7734
7904
 
7735
- virtual void BeginWrite(ColumnWriterState &state);
7736
- virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count);
7737
- virtual void FinalizeWrite(ColumnWriterState &state);
7738
-
7739
- protected:
7740
- void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, idx_t count,
7741
- uint16_t define_value, uint16_t null_value);
7742
- void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat);
7743
-
7744
- void WriteLevels(Serializer &temp_writer, const vector<uint16_t> &levels, idx_t max_value, idx_t start_offset,
7745
- idx_t count);
7905
+ //! indicates whether the write need to analyse the data before preparing it
7906
+ virtual bool HasAnalyze() {
7907
+ return false;
7908
+ }
7746
7909
 
7747
- virtual duckdb_parquet::format::Encoding::type GetEncoding();
7910
+ virtual void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) {
7911
+ throw NotImplementedException("Writer does not need analysis");
7912
+ }
7748
7913
 
7749
- void NextPage(ColumnWriterState &state_p);
7750
- void FlushPage(ColumnWriterState &state_p);
7751
- void WriteDictionary(ColumnWriterState &state_p, unique_ptr<BufferedSerializer> temp_writer, idx_t row_count);
7914
+ //! Called after all data has been passed to Analyze
7915
+ virtual void FinalizeAnalyze(ColumnWriterState &state) {
7916
+ throw NotImplementedException("Writer does not need analysis");
7917
+ }
7752
7918
 
7753
- virtual void FlushDictionary(ColumnWriterState &state, ColumnWriterStatistics *stats);
7919
+ virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) = 0;
7754
7920
 
7755
- //! Initializes the state used to track statistics during writing. Only used for scalar types.
7756
- virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
7757
- //! Retrieves the row size of a vector at the specified location. Only used for scalar types.
7758
- virtual idx_t GetRowSize(Vector &vector, idx_t index);
7759
- //! Writes a (subset of a) vector to the specified serializer. Only used for scalar types.
7760
- virtual void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state,
7761
- Vector &vector, idx_t chunk_start, idx_t chunk_end);
7921
+ virtual void BeginWrite(ColumnWriterState &state) = 0;
7922
+ virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count) = 0;
7923
+ virtual void FinalizeWrite(ColumnWriterState &state) = 0;
7762
7924
 
7763
- //! Initialize the writer for a specific page. Only used for scalar types.
7764
- virtual unique_ptr<ColumnWriterPageState> InitializePageState();
7765
- //! Flushes the writer for a specific page. Only used for scalar types.
7766
- virtual void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state);
7925
+ protected:
7926
+ void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, idx_t count,
7927
+ uint16_t define_value, uint16_t null_value);
7928
+ void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat);
7767
7929
 
7768
7930
  void CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
7769
7931
  unique_ptr<data_t[]> &compressed_buf);
7770
-
7771
- void SetParquetStatistics(StandardColumnWriterState &state, duckdb_parquet::format::ColumnChunk &column);
7772
7932
  };
7773
7933
 
7774
7934
  } // namespace duckdb
@@ -7781,6 +7941,7 @@ class FileOpener;
7781
7941
 
7782
7942
  class ParquetWriter {
7783
7943
  friend class ColumnWriter;
7944
+ friend class BasicColumnWriter;
7784
7945
  friend class ListColumnWriter;
7785
7946
  friend class StructColumnWriter;
7786
7947