duckdb 0.4.1-dev182.0 → 0.4.1-dev1896.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,6 +34,10 @@ public:
34
34
 
35
35
  #include "duckdb.hpp"
36
36
  #ifndef DUCKDB_AMALGAMATION
37
+ #include "duckdb/planner/table_filter.hpp"
38
+ #include "duckdb/planner/filter/constant_filter.hpp"
39
+ #include "duckdb/planner/filter/null_filter.hpp"
40
+ #include "duckdb/planner/filter/conjunction_filter.hpp"
37
41
  #include "duckdb/common/common.hpp"
38
42
  #include "duckdb/common/exception.hpp"
39
43
  #include "duckdb/common/string_util.hpp"
@@ -91,8 +95,8 @@ public:
91
95
  * under the License.
92
96
  */
93
97
 
94
- #ifndef _THRIFT_THRIFT_H_
95
- #define _THRIFT_THRIFT_H_ 1
98
+ #ifndef _DUCKDB_THRIFT_THRIFT_H_
99
+ #define _DUCKDB_THRIFT_THRIFT_H_ 1
96
100
 
97
101
 
98
102
 
@@ -121,8 +125,8 @@ public:
121
125
 
122
126
  // clang-format off
123
127
 
124
- #ifndef _THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
125
- # define _THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
128
+ #ifndef _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
129
+ # define _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
126
130
 
127
131
  #ifdef _WIN32
128
132
  #ifdef _WINSOCKAPI_
@@ -236,7 +240,7 @@ public:
236
240
  # define THRIFT_SHUT_RDWR SHUT_RDWR
237
241
  #endif
238
242
 
239
- #endif // _THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
243
+ #endif // _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_
240
244
 
241
245
 
242
246
  // LICENSE_CHANGE_END
@@ -329,8 +333,8 @@ public:
329
333
  * under the License.
330
334
  */
331
335
 
332
- #ifndef _THRIFT_TLOGGING_H_
333
- #define _THRIFT_TLOGGING_H_ 1
336
+ #ifndef _DUCKDB_THRIFT_TLOGGING_H_
337
+ #define _DUCKDB_THRIFT_TLOGGING_H_ 1
334
338
 
335
339
 
336
340
 
@@ -454,7 +458,7 @@ public:
454
458
  #define T_GENERIC_PROTOCOL(template_class, generic_prot, specific_prot)
455
459
  #endif
456
460
 
457
- #endif // #ifndef _THRIFT_TLOGGING_H_
461
+ #endif // #ifndef _DUCKDB_THRIFT_TLOGGING_H_
458
462
 
459
463
 
460
464
  // LICENSE_CHANGE_END
@@ -546,7 +550,7 @@ void profile_write_pprof(FILE* gen_calls_f, FILE* virtual_calls_f);
546
550
  }
547
551
  } // duckdb_apache::thrift
548
552
 
549
- #endif // #ifndef _THRIFT_THRIFT_H_
553
+ #endif // #ifndef _DUCKDB_THRIFT_THRIFT_H_
550
554
 
551
555
 
552
556
  // LICENSE_CHANGE_END
@@ -576,8 +580,8 @@ void profile_write_pprof(FILE* gen_calls_f, FILE* virtual_calls_f);
576
580
  * under the License.
577
581
  */
578
582
 
579
- #ifndef _THRIFT_TAPPLICATIONEXCEPTION_H_
580
- #define _THRIFT_TAPPLICATIONEXCEPTION_H_ 1
583
+ #ifndef _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_
584
+ #define _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_ 1
581
585
 
582
586
 
583
587
 
@@ -671,7 +675,7 @@ protected:
671
675
  }
672
676
  } // duckdb_apache::thrift
673
677
 
674
- #endif // #ifndef _THRIFT_TAPPLICATIONEXCEPTION_H_
678
+ #endif // #ifndef _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_
675
679
 
676
680
 
677
681
  // LICENSE_CHANGE_END
@@ -701,8 +705,8 @@ protected:
701
705
  * under the License.
702
706
  */
703
707
 
704
- #ifndef _THRIFT_TBASE_H_
705
- #define _THRIFT_TBASE_H_ 1
708
+ #ifndef _DUCKDB_THRIFT_TBASE_H_
709
+ #define _DUCKDB_THRIFT_TBASE_H_ 1
706
710
 
707
711
 
708
712
 
@@ -730,8 +734,8 @@ protected:
730
734
  * under the License.
731
735
  */
732
736
 
733
- #ifndef _THRIFT_PROTOCOL_TPROTOCOL_H_
734
- #define _THRIFT_PROTOCOL_TPROTOCOL_H_ 1
737
+ #ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_
738
+ #define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ 1
735
739
 
736
740
  #ifdef _WIN32
737
741
  // Need to come before any Windows.h includes
@@ -763,8 +767,8 @@ protected:
763
767
  * under the License.
764
768
  */
765
769
 
766
- #ifndef _THRIFT_TRANSPORT_TTRANSPORT_H_
767
- #define _THRIFT_TRANSPORT_TTRANSPORT_H_ 1
770
+ #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_
771
+ #define _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_ 1
768
772
 
769
773
 
770
774
 
@@ -792,8 +796,8 @@ protected:
792
796
  * under the License.
793
797
  */
794
798
 
795
- #ifndef _THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_
796
- #define _THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ 1
799
+ #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_
800
+ #define _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ 1
797
801
 
798
802
  // FUCK OFF #include <boost/numeric/conversion/cast.hpp>
799
803
  #include <string>
@@ -878,7 +882,7 @@ protected:
878
882
  }
879
883
  } // duckdb_apache::thrift::transport
880
884
 
881
- #endif // #ifndef _THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_
885
+ #endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_
882
886
 
883
887
 
884
888
  // LICENSE_CHANGE_END
@@ -1129,7 +1133,7 @@ public:
1129
1133
  }
1130
1134
  } // duckdb_apache::thrift::transport
1131
1135
 
1132
- #endif // #ifndef _THRIFT_TRANSPORT_TTRANSPORT_H_
1136
+ #endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_
1133
1137
 
1134
1138
 
1135
1139
  // LICENSE_CHANGE_END
@@ -1159,8 +1163,8 @@ public:
1159
1163
  * under the License.
1160
1164
  */
1161
1165
 
1162
- #ifndef _THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_
1163
- #define _THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ 1
1166
+ #ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_
1167
+ #define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ 1
1164
1168
 
1165
1169
  #include <string>
1166
1170
 
@@ -1244,7 +1248,7 @@ protected:
1244
1248
  }
1245
1249
  } // duckdb_apache::thrift::protocol
1246
1250
 
1247
- #endif // #ifndef _THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_
1251
+ #endif // #ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_
1248
1252
 
1249
1253
 
1250
1254
  // LICENSE_CHANGE_END
@@ -1267,37 +1271,39 @@ protected:
1267
1271
  // but that doesn't work.
1268
1272
  // For a pretty in-depth explanation of the problem, see
1269
1273
  // http://cellperformance.beyond3d.com/articles/2006/06/understanding-strict-aliasing.html
1274
+ namespace duckdb_apache { namespace thrift {
1270
1275
  template <typename To, typename From>
1271
1276
  static inline To bitwise_cast(From from) {
1272
- static_assert(sizeof(From) == sizeof(To), "sizeof(From) == sizeof(To)");
1273
-
1274
- // BAD!!! These are all broken with -O2.
1275
- //return *reinterpret_cast<To*>(&from); // BAD!!!
1276
- //return *static_cast<To*>(static_cast<void*>(&from)); // BAD!!!
1277
- //return *(To*)(void*)&from; // BAD!!!
1278
-
1279
- // Super clean and paritally blessed by section 3.9 of the standard.
1280
- //unsigned char c[sizeof(from)];
1281
- //memcpy(c, &from, sizeof(from));
1282
- //To to;
1283
- //memcpy(&to, c, sizeof(c));
1284
- //return to;
1285
-
1286
- // Slightly more questionable.
1287
- // Same code emitted by GCC.
1288
- //To to;
1289
- //memcpy(&to, &from, sizeof(from));
1290
- //return to;
1291
-
1292
- // Technically undefined, but almost universally supported,
1293
- // and the most efficient implementation.
1294
- union {
1295
- From f;
1296
- To t;
1297
- } u;
1298
- u.f = from;
1299
- return u.t;
1277
+ static_assert(sizeof(From) == sizeof(To), "sizeof(From) == sizeof(To)");
1278
+
1279
+ // BAD!!! These are all broken with -O2.
1280
+ // return *reinterpret_cast<To*>(&from); // BAD!!!
1281
+ // return *static_cast<To*>(static_cast<void*>(&from)); // BAD!!!
1282
+ // return *(To*)(void*)&from; // BAD!!!
1283
+
1284
+ // Super clean and paritally blessed by section 3.9 of the standard.
1285
+ // unsigned char c[sizeof(from)];
1286
+ // memcpy(c, &from, sizeof(from));
1287
+ // To to;
1288
+ // memcpy(&to, c, sizeof(c));
1289
+ // return to;
1290
+
1291
+ // Slightly more questionable.
1292
+ // Same code emitted by GCC.
1293
+ // To to;
1294
+ // memcpy(&to, &from, sizeof(from));
1295
+ // return to;
1296
+
1297
+ // Technically undefined, but almost universally supported,
1298
+ // and the most efficient implementation.
1299
+ union {
1300
+ From f;
1301
+ To t;
1302
+ } u;
1303
+ u.f = from;
1304
+ return u.t;
1300
1305
  }
1306
+ }} // namespace duckdb_apache::thrift
1301
1307
 
1302
1308
 
1303
1309
  #ifdef HAVE_SYS_PARAM_H
@@ -1983,7 +1989,7 @@ uint32_t skip(Protocol_& prot, TType type) {
1983
1989
 
1984
1990
  }}} // duckdb_apache::thrift::protocol
1985
1991
 
1986
- #endif // #define _THRIFT_PROTOCOL_TPROTOCOL_H_ 1
1992
+ #endif // #define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ 1
1987
1993
 
1988
1994
 
1989
1995
  // LICENSE_CHANGE_END
@@ -2001,7 +2007,7 @@ public:
2001
2007
  }
2002
2008
  } // duckdb_apache::thrift
2003
2009
 
2004
- #endif // #ifndef _THRIFT_TBASE_H_
2010
+ #endif // #ifndef _DUCKDB_THRIFT_TBASE_H_
2005
2011
 
2006
2012
 
2007
2013
  // LICENSE_CHANGE_END
@@ -4651,8 +4657,8 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj);
4651
4657
  * under the License.
4652
4658
  */
4653
4659
 
4654
- #ifndef _THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_
4655
- #define _THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_ 1
4660
+ #ifndef _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_
4661
+ #define _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_ 1
4656
4662
 
4657
4663
 
4658
4664
 
@@ -4679,8 +4685,8 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj);
4679
4685
  * under the License.
4680
4686
  */
4681
4687
 
4682
- #ifndef _THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_
4683
- #define _THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1
4688
+ #ifndef _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_
4689
+ #define _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1
4684
4690
 
4685
4691
 
4686
4692
 
@@ -5172,7 +5178,7 @@ protected:
5172
5178
  }
5173
5179
  } // duckdb_apache::thrift::protocol
5174
5180
 
5175
- #endif // #define _THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1
5181
+ #endif // #define _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1
5176
5182
 
5177
5183
 
5178
5184
  // LICENSE_CHANGE_END
@@ -5441,8 +5447,8 @@ typedef TCompactProtocolFactoryT<TTransport> TCompactProtocolFactory;
5441
5447
  * specific language governing permissions and limitations
5442
5448
  * under the License.
5443
5449
  */
5444
- #ifndef _THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_
5445
- #define _THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ 1
5450
+ #ifndef _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_
5451
+ #define _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ 1
5446
5452
 
5447
5453
  #include <limits>
5448
5454
 
@@ -6248,7 +6254,7 @@ TType TCompactProtocolT<Transport_>::getTType(int8_t type) {
6248
6254
 
6249
6255
  }}} // duckdb_apache::thrift::protocol
6250
6256
 
6251
- #endif // _THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_
6257
+ #endif // _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_
6252
6258
 
6253
6259
 
6254
6260
  // LICENSE_CHANGE_END
@@ -6284,8 +6290,8 @@ TType TCompactProtocolT<Transport_>::getTType(int8_t type) {
6284
6290
  * under the License.
6285
6291
  */
6286
6292
 
6287
- #ifndef _THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_
6288
- #define _THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ 1
6293
+ #ifndef _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_
6294
+ #define _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ 1
6289
6295
 
6290
6296
  #include <cstdlib>
6291
6297
  #include <cstddef>
@@ -6319,8 +6325,8 @@ TType TCompactProtocolT<Transport_>::getTType(int8_t type) {
6319
6325
  * under the License.
6320
6326
  */
6321
6327
 
6322
- #ifndef _THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_
6323
- #define _THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ 1
6328
+ #ifndef _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_
6329
+ #define _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ 1
6324
6330
 
6325
6331
 
6326
6332
 
@@ -6439,7 +6445,7 @@ protected:
6439
6445
  }
6440
6446
  } // duckdb_apache::thrift::transport
6441
6447
 
6442
- #endif // #ifndef _THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_
6448
+ #endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_
6443
6449
 
6444
6450
 
6445
6451
  // LICENSE_CHANGE_END
@@ -6902,7 +6908,7 @@ protected:
6902
6908
  }
6903
6909
  } // duckdb_apache::thrift::transport
6904
6910
 
6905
- #endif // #ifndef _THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_
6911
+ #endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_
6906
6912
 
6907
6913
 
6908
6914
  // LICENSE_CHANGE_END
@@ -6924,7 +6930,7 @@ struct ReadHead {
6924
6930
  uint64_t size;
6925
6931
 
6926
6932
  // Current info
6927
- unique_ptr<AllocatedData> data;
6933
+ AllocatedData data;
6928
6934
  bool data_isset = false;
6929
6935
 
6930
6936
  idx_t GetEnd() const {
@@ -7019,7 +7025,7 @@ struct ReadAheadBuffer {
7019
7025
  throw std::runtime_error("Prefetch registered requested for bytes outside file");
7020
7026
  }
7021
7027
 
7022
- handle.Read(read_head.data->get(), read_head.size, read_head.location);
7028
+ handle.Read(read_head.data.get(), read_head.size, read_head.location);
7023
7029
  read_head.data_isset = true;
7024
7030
  }
7025
7031
  }
@@ -7041,16 +7047,16 @@ public:
7041
7047
 
7042
7048
  if (!prefetch_buffer->data_isset) {
7043
7049
  prefetch_buffer->Allocate(allocator);
7044
- handle.Read(prefetch_buffer->data->get(), prefetch_buffer->size, prefetch_buffer->location);
7050
+ handle.Read(prefetch_buffer->data.get(), prefetch_buffer->size, prefetch_buffer->location);
7045
7051
  prefetch_buffer->data_isset = true;
7046
7052
  }
7047
- memcpy(buf, prefetch_buffer->data->get() + location - prefetch_buffer->location, len);
7053
+ memcpy(buf, prefetch_buffer->data.get() + location - prefetch_buffer->location, len);
7048
7054
  } else {
7049
7055
  if (prefetch_mode && len < PREFETCH_FALLBACK_BUFFERSIZE && len > 0) {
7050
7056
  Prefetch(location, MinValue<uint64_t>(PREFETCH_FALLBACK_BUFFERSIZE, handle.GetFileSize() - location));
7051
7057
  auto prefetch_buffer_fallback = ra_buffer.GetReadHead(location);
7052
7058
  D_ASSERT(location - prefetch_buffer_fallback->location + len <= prefetch_buffer_fallback->size);
7053
- memcpy(buf, prefetch_buffer_fallback->data->get() + location - prefetch_buffer_fallback->location, len);
7059
+ memcpy(buf, prefetch_buffer_fallback->data.get() + location - prefetch_buffer_fallback->location, len);
7054
7060
  } else {
7055
7061
  handle.Read(buf, len, location);
7056
7062
  }
@@ -7191,12 +7197,12 @@ public:
7191
7197
  if (new_size > alloc_len) {
7192
7198
  alloc_len = new_size;
7193
7199
  allocated_data = allocator.Allocate(alloc_len);
7194
- ptr = (char *)allocated_data->get();
7200
+ ptr = (char *)allocated_data.get();
7195
7201
  }
7196
7202
  }
7197
7203
 
7198
7204
  private:
7199
- unique_ptr<AllocatedData> allocated_data;
7205
+ AllocatedData allocated_data;
7200
7206
  idx_t alloc_len = 0;
7201
7207
  };
7202
7208
 
@@ -7699,7 +7705,6 @@ class FileMetaData;
7699
7705
  namespace duckdb {
7700
7706
  class Allocator;
7701
7707
  class ClientContext;
7702
- class ChunkCollection;
7703
7708
  class BaseStatistics;
7704
7709
  class TableFilterSet;
7705
7710
 
@@ -7734,6 +7739,12 @@ struct ParquetOptions {
7734
7739
  explicit ParquetOptions(ClientContext &context);
7735
7740
 
7736
7741
  bool binary_as_string = false;
7742
+ bool filename = false;
7743
+ bool hive_partitioning = false;
7744
+
7745
+ public:
7746
+ void Serialize(FieldWriter &writer) const;
7747
+ void Deserialize(FieldReader &reader);
7737
7748
  };
7738
7749
 
7739
7750
  class ParquetReader {
@@ -7830,7 +7841,7 @@ private:
7830
7841
  #include "duckdb/common/exception.hpp"
7831
7842
  #include "duckdb/common/mutex.hpp"
7832
7843
  #include "duckdb/common/serializer/buffered_file_writer.hpp"
7833
- #include "duckdb/common/types/chunk_collection.hpp"
7844
+ #include "duckdb/common/types/column_data_collection.hpp"
7834
7845
  #endif
7835
7846
 
7836
7847
 
@@ -7851,7 +7862,7 @@ namespace duckdb {
7851
7862
  class BufferedSerializer;
7852
7863
  class ParquetWriter;
7853
7864
  class ColumnWriterPageState;
7854
- class StandardColumnWriterState;
7865
+ class BasicColumnWriterState;
7855
7866
 
7856
7867
  class ColumnWriterState {
7857
7868
  public:
@@ -7873,9 +7884,6 @@ public:
7873
7884
  };
7874
7885
 
7875
7886
  class ColumnWriter {
7876
- //! We limit the uncompressed page size to 100MB
7877
- // The max size in Parquet is 2GB, but we choose a more conservative limit
7878
- static constexpr const idx_t MAX_UNCOMPRESSED_PAGE_SIZE = 100000000;
7879
7887
 
7880
7888
  public:
7881
7889
  ColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path, idx_t max_repeat,
@@ -7899,46 +7907,35 @@ public:
7899
7907
  idx_t max_repeat = 0, idx_t max_define = 1,
7900
7908
  bool can_have_nulls = true);
7901
7909
 
7902
- virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group);
7903
- virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count);
7910
+ virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) = 0;
7904
7911
 
7905
- virtual void BeginWrite(ColumnWriterState &state);
7906
- virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count);
7907
- virtual void FinalizeWrite(ColumnWriterState &state);
7908
-
7909
- protected:
7910
- void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, idx_t count,
7911
- uint16_t define_value, uint16_t null_value);
7912
- void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat);
7913
-
7914
- void WriteLevels(Serializer &temp_writer, const vector<uint16_t> &levels, idx_t max_value, idx_t start_offset,
7915
- idx_t count);
7912
+ //! indicates whether the write need to analyse the data before preparing it
7913
+ virtual bool HasAnalyze() {
7914
+ return false;
7915
+ }
7916
7916
 
7917
- virtual duckdb_parquet::format::Encoding::type GetEncoding();
7917
+ virtual void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) {
7918
+ throw NotImplementedException("Writer does not need analysis");
7919
+ }
7918
7920
 
7919
- void NextPage(ColumnWriterState &state_p);
7920
- void FlushPage(ColumnWriterState &state_p);
7921
- void WriteDictionary(ColumnWriterState &state_p, unique_ptr<BufferedSerializer> temp_writer, idx_t row_count);
7921
+ //! Called after all data has been passed to Analyze
7922
+ virtual void FinalizeAnalyze(ColumnWriterState &state) {
7923
+ throw NotImplementedException("Writer does not need analysis");
7924
+ }
7922
7925
 
7923
- virtual void FlushDictionary(ColumnWriterState &state, ColumnWriterStatistics *stats);
7926
+ virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) = 0;
7924
7927
 
7925
- //! Initializes the state used to track statistics during writing. Only used for scalar types.
7926
- virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
7927
- //! Retrieves the row size of a vector at the specified location. Only used for scalar types.
7928
- virtual idx_t GetRowSize(Vector &vector, idx_t index);
7929
- //! Writes a (subset of a) vector to the specified serializer. Only used for scalar types.
7930
- virtual void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state,
7931
- Vector &vector, idx_t chunk_start, idx_t chunk_end);
7928
+ virtual void BeginWrite(ColumnWriterState &state) = 0;
7929
+ virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count) = 0;
7930
+ virtual void FinalizeWrite(ColumnWriterState &state) = 0;
7932
7931
 
7933
- //! Initialize the writer for a specific page. Only used for scalar types.
7934
- virtual unique_ptr<ColumnWriterPageState> InitializePageState();
7935
- //! Flushes the writer for a specific page. Only used for scalar types.
7936
- virtual void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state);
7932
+ protected:
7933
+ void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, idx_t count,
7934
+ uint16_t define_value, uint16_t null_value);
7935
+ void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat);
7937
7936
 
7938
7937
  void CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
7939
7938
  unique_ptr<data_t[]> &compressed_buf);
7940
-
7941
- void SetParquetStatistics(StandardColumnWriterState &state, duckdb_parquet::format::ColumnChunk &column);
7942
7939
  };
7943
7940
 
7944
7941
  } // namespace duckdb
@@ -7951,6 +7948,7 @@ class FileOpener;
7951
7948
 
7952
7949
  class ParquetWriter {
7953
7950
  friend class ColumnWriter;
7951
+ friend class BasicColumnWriter;
7954
7952
  friend class ListColumnWriter;
7955
7953
  friend class StructColumnWriter;
7956
7954
 
@@ -7959,7 +7957,7 @@ public:
7959
7957
  vector<string> names, duckdb_parquet::format::CompressionCodec::type codec);
7960
7958
 
7961
7959
  public:
7962
- void Flush(ChunkCollection &buffer);
7960
+ void Flush(ColumnDataCollection &buffer);
7963
7961
  void Finalize();
7964
7962
 
7965
7963
  static duckdb_parquet::format::Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type);