duckdb 0.3.5-dev987.0 → 0.4.1-dev102.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4625,7 +4625,7 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj);
4625
4625
  // LICENSE_CHANGE_END
4626
4626
 
4627
4627
 
4628
-
4628
+ #include <list>
4629
4629
 
4630
4630
 
4631
4631
  // LICENSE_CHANGE_BEGIN
@@ -6916,31 +6916,174 @@ protected:
6916
6916
 
6917
6917
  namespace duckdb {
6918
6918
 
6919
+ // A ReadHead for prefetching data in a specific range
6920
+ struct ReadHead {
6921
+ ReadHead(idx_t location, uint64_t size) : location(location), size(size) {};
6922
+ // Hint info
6923
+ idx_t location;
6924
+ uint64_t size;
6925
+
6926
+ // Current info
6927
+ unique_ptr<AllocatedData> data;
6928
+ bool data_isset = false;
6929
+
6930
+ idx_t GetEnd() const {
6931
+ return size + location;
6932
+ }
6933
+
6934
+ void Allocate(Allocator &allocator) {
6935
+ data = allocator.Allocate(size);
6936
+ }
6937
+ };
6938
+
6939
+ // Comparator for ReadHeads that are either overlapping, adjacent, or within ALLOW_GAP bytes from each other
6940
+ struct ReadHeadComparator {
6941
+ static constexpr uint64_t ALLOW_GAP = 1 << 14; // 16 KiB
6942
+ bool operator()(const ReadHead *a, const ReadHead *b) const {
6943
+ auto a_start = a->location;
6944
+ auto a_end = a->location + a->size;
6945
+ auto b_start = b->location;
6946
+
6947
+ if (a_end <= NumericLimits<idx_t>::Maximum() - ALLOW_GAP) {
6948
+ a_end += ALLOW_GAP;
6949
+ }
6950
+
6951
+ return a_start < b_start && a_end < b_start;
6952
+ }
6953
+ };
6954
+
6955
+ // Two-step read ahead buffer
6956
+ // 1: register all ranges that will be read, merging ranges that are consecutive
6957
+ // 2: prefetch all registered ranges
6958
+ struct ReadAheadBuffer {
6959
+ ReadAheadBuffer(Allocator &allocator, FileHandle &handle, FileOpener &opener)
6960
+ : allocator(allocator), handle(handle), file_opener(opener) {
6961
+ }
6962
+
6963
+ // The list of read heads
6964
+ std::list<ReadHead> read_heads;
6965
+ // Set for merging consecutive ranges
6966
+ std::set<ReadHead *, ReadHeadComparator> merge_set;
6967
+
6968
+ Allocator &allocator;
6969
+ FileHandle &handle;
6970
+ FileOpener &file_opener;
6971
+
6972
+ idx_t total_size = 0;
6973
+
6974
+ // Add a read head to the prefetching list
6975
+ void AddReadHead(idx_t pos, uint64_t len, bool merge_buffers = true) {
6976
+ // Attempt to merge with existing
6977
+ if (merge_buffers) {
6978
+ ReadHead new_read_head {pos, len};
6979
+ auto lookup_set = merge_set.find(&new_read_head);
6980
+ if (lookup_set != merge_set.end()) {
6981
+ auto existing_head = *lookup_set;
6982
+ auto new_start = MinValue<idx_t>(existing_head->location, new_read_head.location);
6983
+ auto new_length = MaxValue<idx_t>(existing_head->GetEnd(), new_read_head.GetEnd()) - new_start;
6984
+ existing_head->location = new_start;
6985
+ existing_head->size = new_length;
6986
+ return;
6987
+ }
6988
+ }
6989
+
6990
+ read_heads.emplace_front(ReadHead(pos, len));
6991
+ total_size += len;
6992
+ auto &read_head = read_heads.front();
6993
+
6994
+ if (merge_buffers) {
6995
+ merge_set.insert(&read_head);
6996
+ }
6997
+
6998
+ if (read_head.GetEnd() > handle.GetFileSize()) {
6999
+ throw std::runtime_error("Prefetch registered for bytes outside file");
7000
+ }
7001
+ }
7002
+
7003
+ // Returns the relevant read head
7004
+ ReadHead *GetReadHead(idx_t pos) {
7005
+ for (auto &read_head : read_heads) {
7006
+ if (pos >= read_head.location && pos < read_head.GetEnd()) {
7007
+ return &read_head;
7008
+ }
7009
+ }
7010
+ return nullptr;
7011
+ }
7012
+
7013
+ // Prefetch all read heads
7014
+ void Prefetch() {
7015
+ for (auto &read_head : read_heads) {
7016
+ read_head.Allocate(allocator);
7017
+
7018
+ if (read_head.GetEnd() > handle.GetFileSize()) {
7019
+ throw std::runtime_error("Prefetch registered requested for bytes outside file");
7020
+ }
7021
+
7022
+ handle.Read(read_head.data->get(), read_head.size, read_head.location);
7023
+ read_head.data_isset = true;
7024
+ }
7025
+ }
7026
+ };
7027
+
6919
7028
  class ThriftFileTransport : public duckdb_apache::thrift::transport::TVirtualTransport<ThriftFileTransport> {
6920
7029
  public:
6921
- ThriftFileTransport(Allocator &allocator, FileHandle &handle_p)
6922
- : allocator(allocator), handle(handle_p), location(0) {
7030
+ static constexpr uint64_t PREFETCH_FALLBACK_BUFFERSIZE = 1000000;
7031
+
7032
+ ThriftFileTransport(Allocator &allocator, FileHandle &handle_p, FileOpener &opener, bool prefetch_mode_p)
7033
+ : handle(handle_p), location(0), allocator(allocator), ra_buffer(ReadAheadBuffer(allocator, handle_p, opener)),
7034
+ prefetch_mode(prefetch_mode_p) {
6923
7035
  }
6924
7036
 
6925
7037
  uint32_t read(uint8_t *buf, uint32_t len) {
6926
- if (prefetched_data && location >= prefetch_location &&
6927
- location + len < prefetch_location + prefetched_data->GetSize()) {
6928
- memcpy(buf, prefetched_data->get() + location - prefetch_location, len);
7038
+ auto prefetch_buffer = ra_buffer.GetReadHead(location);
7039
+ if (prefetch_buffer != nullptr && location - prefetch_buffer->location + len <= prefetch_buffer->size) {
7040
+ D_ASSERT(location - prefetch_buffer->location + len <= prefetch_buffer->size);
7041
+
7042
+ if (!prefetch_buffer->data_isset) {
7043
+ prefetch_buffer->Allocate(allocator);
7044
+ handle.Read(prefetch_buffer->data->get(), prefetch_buffer->size, prefetch_buffer->location);
7045
+ prefetch_buffer->data_isset = true;
7046
+ }
7047
+ memcpy(buf, prefetch_buffer->data->get() + location - prefetch_buffer->location, len);
6929
7048
  } else {
6930
- handle.Read(buf, len, location);
7049
+ if (prefetch_mode && len < PREFETCH_FALLBACK_BUFFERSIZE && len > 0) {
7050
+ Prefetch(location, MinValue<uint64_t>(PREFETCH_FALLBACK_BUFFERSIZE, handle.GetFileSize() - location));
7051
+ auto prefetch_buffer_fallback = ra_buffer.GetReadHead(location);
7052
+ D_ASSERT(location - prefetch_buffer_fallback->location + len <= prefetch_buffer_fallback->size);
7053
+ memcpy(buf, prefetch_buffer_fallback->data->get() + location - prefetch_buffer_fallback->location, len);
7054
+ } else {
7055
+ handle.Read(buf, len, location);
7056
+ }
6931
7057
  }
6932
7058
  location += len;
6933
7059
  return len;
6934
7060
  }
6935
7061
 
6936
- void Prefetch(idx_t pos, idx_t len) {
6937
- prefetch_location = pos;
6938
- prefetched_data = allocator.Allocate(len);
6939
- handle.Read(prefetched_data->get(), len, prefetch_location);
7062
+ // Prefetch a single buffer
7063
+ void Prefetch(idx_t pos, uint64_t len) {
7064
+ RegisterPrefetch(pos, len, false);
7065
+ FinalizeRegistration();
7066
+ PrefetchRegistered();
7067
+ }
7068
+
7069
+ // Register a buffer for prefixing
7070
+ void RegisterPrefetch(idx_t pos, uint64_t len, bool can_merge = true) {
7071
+ ra_buffer.AddReadHead(pos, len, can_merge);
7072
+ }
7073
+
7074
+ // Prevents any further merges, should be called before PrefetchRegistered
7075
+ void FinalizeRegistration() {
7076
+ ra_buffer.merge_set.clear();
7077
+ }
7078
+
7079
+ // Prefetch all previously registered ranges
7080
+ void PrefetchRegistered() {
7081
+ ra_buffer.Prefetch();
6940
7082
  }
6941
7083
 
6942
7084
  void ClearPrefetch() {
6943
- prefetched_data.reset();
7085
+ ra_buffer.read_heads.clear();
7086
+ ra_buffer.merge_set.clear();
6944
7087
  }
6945
7088
 
6946
7089
  void SetLocation(idx_t location_p) {
@@ -6955,12 +7098,17 @@ public:
6955
7098
  }
6956
7099
 
6957
7100
  private:
6958
- Allocator &allocator;
6959
7101
  FileHandle &handle;
6960
7102
  idx_t location;
6961
7103
 
6962
- unique_ptr<AllocatedData> prefetched_data;
6963
- idx_t prefetch_location;
7104
+ Allocator &allocator;
7105
+
7106
+ // Multi-buffer prefetch
7107
+ ReadAheadBuffer ra_buffer;
7108
+
7109
+ // Whether the prefetch mode is enabled. In this mode the DirectIO flag of the handle will be set and the parquet
7110
+ // reader will manage the read buffering.
7111
+ bool prefetch_mode;
6964
7112
  };
6965
7113
 
6966
7114
  } // namespace duckdb
@@ -7417,8 +7565,13 @@ public:
7417
7565
  idx_t MaxDefine() const;
7418
7566
  idx_t MaxRepeat() const;
7419
7567
 
7568
+ virtual idx_t FileOffset() const;
7569
+ virtual uint64_t TotalCompressedSize();
7420
7570
  virtual idx_t GroupRowsAvailable();
7421
7571
 
7572
+ // register the range this reader will touch for prefetching
7573
+ virtual void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge);
7574
+
7422
7575
  virtual unique_ptr<BaseStatistics> Stats(const std::vector<ColumnChunk> &columns);
7423
7576
 
7424
7577
  protected:
@@ -7433,6 +7586,9 @@ protected:
7433
7586
  virtual void DictReference(Vector &result);
7434
7587
  virtual void PlainReference(shared_ptr<ByteBuffer>, Vector &result);
7435
7588
 
7589
+ // applies any skips that were registered using Skip()
7590
+ virtual void ApplyPendingSkips(idx_t num_values);
7591
+
7436
7592
  bool HasDefines() {
7437
7593
  return max_define > 0;
7438
7594
  }
@@ -7451,13 +7607,15 @@ protected:
7451
7607
  ParquetReader &reader;
7452
7608
  LogicalType type;
7453
7609
 
7610
+ idx_t pending_skips = 0;
7611
+
7454
7612
  private:
7455
7613
  void PrepareRead(parquet_filter_t &filter);
7456
7614
  void PreparePage(idx_t compressed_page_size, idx_t uncompressed_page_size);
7457
7615
  void PrepareDataPage(PageHeader &page_hdr);
7458
7616
  void PreparePageV2(PageHeader &page_hdr);
7459
7617
 
7460
- const duckdb_parquet::format::ColumnChunk *chunk;
7618
+ const duckdb_parquet::format::ColumnChunk *chunk = nullptr;
7461
7619
 
7462
7620
  duckdb_apache::thrift::protocol::TProtocol *protocol;
7463
7621
  idx_t page_rows_available;
@@ -7545,6 +7703,11 @@ class ChunkCollection;
7545
7703
  class BaseStatistics;
7546
7704
  class TableFilterSet;
7547
7705
 
7706
+ struct ParquetReaderPrefetchConfig {
7707
+ // Percentage of data in a row group span that should be scanned for enabling whole group prefetch
7708
+ static constexpr double WHOLE_GROUP_PREFETCH_MINIMUM_SCAN = 0.95;
7709
+ };
7710
+
7548
7711
  struct ParquetReaderScanState {
7549
7712
  vector<idx_t> group_idx_list;
7550
7713
  int64_t current_group;
@@ -7560,6 +7723,9 @@ struct ParquetReaderScanState {
7560
7723
 
7561
7724
  ResizeableBuffer define_buf;
7562
7725
  ResizeableBuffer repeat_buf;
7726
+
7727
+ bool prefetch_mode = false;
7728
+ bool current_group_prefetched = false;
7563
7729
  };
7564
7730
 
7565
7731
  struct ParquetOptions {
@@ -7624,6 +7790,10 @@ private:
7624
7790
  idx_t depth, idx_t max_define, idx_t max_repeat,
7625
7791
  idx_t &next_schema_idx, idx_t &next_file_idx);
7626
7792
  const duckdb_parquet::format::RowGroup &GetGroup(ParquetReaderScanState &state);
7793
+ uint64_t GetGroupCompressedSize(ParquetReaderScanState &state);
7794
+ idx_t GetGroupOffset(ParquetReaderScanState &state);
7795
+ // Group span is the distance between the min page offset and the max page offset plus the max page compressed size
7796
+ uint64_t GetGroupSpan(ParquetReaderScanState &state);
7627
7797
  void PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx);
7628
7798
  LogicalType DeriveLogicalType(const SchemaElement &s_ele);
7629
7799
 
@@ -0,0 +1,82 @@
1
+ var sqlite3 = require("..");
2
+ var assert = require("assert");
3
+
4
+ describe("pathname search support", function () {
5
+ let db;
6
+ describe("without search paths", () => {
7
+ before((done) => {
8
+ db = new sqlite3.Database(":memory:", done);
9
+ });
10
+
11
+ it("supports a full path", function (done) {
12
+ db.prepare('select * from "test/support/prepare.csv"').all(
13
+ (err, result) => {
14
+ assert(err === null);
15
+ assert(result.length === 5000);
16
+ done();
17
+ }
18
+ );
19
+ });
20
+
21
+ it("don't not support a partial path", function (done) {
22
+ db.prepare('select * from "prepare.csv"').all((err, result) => {
23
+ assert(err.code === "DUCKDB_NODEJS_ERROR");
24
+ assert(err.errno === -1);
25
+ assert(result == null);
26
+ done();
27
+ });
28
+ });
29
+ });
30
+
31
+ describe("with search paths", () => {
32
+ before((done) => {
33
+ db = new sqlite3.Database(":memory:", () => {
34
+ db.prepare("SET FILE_SEARCH_PATH='test/support'").run(done);
35
+ });
36
+ });
37
+
38
+ it("supports a full path", function (done) {
39
+ db.prepare('select * from "test/support/prepare.csv"').all(
40
+ (err, result) => {
41
+ assert(err === null);
42
+ assert(result.length === 5000);
43
+ done();
44
+ }
45
+ );
46
+ });
47
+
48
+ it("supports a partial path", function (done) {
49
+ db.prepare('select * from "prepare.csv"').all((err, result) => {
50
+ assert(err === null);
51
+ assert(result.length === 5000);
52
+ done();
53
+ });
54
+ });
55
+ });
56
+
57
+ describe("with multiple search paths", () => {
58
+ before((done) => {
59
+ db = new sqlite3.Database(":memory:", () => {
60
+ db.prepare("SET FILE_SEARCH_PATH='test/support'").run(done);
61
+ });
62
+ });
63
+
64
+ it("supports a full path", function (done) {
65
+ db.prepare('select * from "test/support/prepare.csv"').all(
66
+ (err, result) => {
67
+ assert(err === null);
68
+ assert(result.length === 5000);
69
+ done();
70
+ }
71
+ );
72
+ });
73
+
74
+ it("supports a partial path", function (done) {
75
+ db.prepare('select * from "prepare.csv"').all((err, result) => {
76
+ assert(err === null);
77
+ assert(result.length === 5000);
78
+ done();
79
+ });
80
+ });
81
+ });
82
+ });