duckdb 0.4.1-dev362.0 → 0.4.1-dev389.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7853,7 +7853,7 @@ namespace duckdb {
7853
7853
  class BufferedSerializer;
7854
7854
  class ParquetWriter;
7855
7855
  class ColumnWriterPageState;
7856
- class StandardColumnWriterState;
7856
+ class BasicColumnWriterState;
7857
7857
 
7858
7858
  class ColumnWriterState {
7859
7859
  public:
@@ -7875,9 +7875,6 @@ public:
7875
7875
  };
7876
7876
 
7877
7877
  class ColumnWriter {
7878
- //! We limit the uncompressed page size to 100MB
7879
- // The max size in Parquet is 2GB, but we choose a more conservative limit
7880
- static constexpr const idx_t MAX_UNCOMPRESSED_PAGE_SIZE = 100000000;
7881
7878
 
7882
7879
  public:
7883
7880
  ColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path, idx_t max_repeat,
@@ -7901,46 +7898,20 @@ public:
7901
7898
  idx_t max_repeat = 0, idx_t max_define = 1,
7902
7899
  bool can_have_nulls = true);
7903
7900
 
7904
- virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group);
7905
- virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count);
7901
+ virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) = 0;
7902
+ virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) = 0;
7906
7903
 
7907
- virtual void BeginWrite(ColumnWriterState &state);
7908
- virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count);
7909
- virtual void FinalizeWrite(ColumnWriterState &state);
7904
+ virtual void BeginWrite(ColumnWriterState &state) = 0;
7905
+ virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count) = 0;
7906
+ virtual void FinalizeWrite(ColumnWriterState &state) = 0;
7910
7907
 
7911
7908
  protected:
7912
7909
  void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, idx_t count,
7913
7910
  uint16_t define_value, uint16_t null_value);
7914
7911
  void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat);
7915
7912
 
7916
- void WriteLevels(Serializer &temp_writer, const vector<uint16_t> &levels, idx_t max_value, idx_t start_offset,
7917
- idx_t count);
7918
-
7919
- virtual duckdb_parquet::format::Encoding::type GetEncoding();
7920
-
7921
- void NextPage(ColumnWriterState &state_p);
7922
- void FlushPage(ColumnWriterState &state_p);
7923
- void WriteDictionary(ColumnWriterState &state_p, unique_ptr<BufferedSerializer> temp_writer, idx_t row_count);
7924
-
7925
- virtual void FlushDictionary(ColumnWriterState &state, ColumnWriterStatistics *stats);
7926
-
7927
- //! Initializes the state used to track statistics during writing. Only used for scalar types.
7928
- virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
7929
- //! Retrieves the row size of a vector at the specified location. Only used for scalar types.
7930
- virtual idx_t GetRowSize(Vector &vector, idx_t index);
7931
- //! Writes a (subset of a) vector to the specified serializer. Only used for scalar types.
7932
- virtual void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state,
7933
- Vector &vector, idx_t chunk_start, idx_t chunk_end);
7934
-
7935
- //! Initialize the writer for a specific page. Only used for scalar types.
7936
- virtual unique_ptr<ColumnWriterPageState> InitializePageState();
7937
- //! Flushes the writer for a specific page. Only used for scalar types.
7938
- virtual void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state);
7939
-
7940
7913
  void CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
7941
7914
  unique_ptr<data_t[]> &compressed_buf);
7942
-
7943
- void SetParquetStatistics(StandardColumnWriterState &state, duckdb_parquet::format::ColumnChunk &column);
7944
7915
  };
7945
7916
 
7946
7917
  } // namespace duckdb
@@ -7953,6 +7924,7 @@ class FileOpener;
7953
7924
 
7954
7925
  class ParquetWriter {
7955
7926
  friend class ColumnWriter;
7927
+ friend class BasicColumnWriter;
7956
7928
  friend class ListColumnWriter;
7957
7929
  friend class StructColumnWriter;
7958
7930