duckdb 0.4.1-dev362.0 → 0.4.1-dev389.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/duckdb.cpp +9944 -9571
- package/src/duckdb.hpp +96 -24
- package/src/parquet-amalgamation.cpp +33418 -33375
- package/src/parquet-amalgamation.hpp +7 -35
|
@@ -7853,7 +7853,7 @@ namespace duckdb {
|
|
|
7853
7853
|
class BufferedSerializer;
|
|
7854
7854
|
class ParquetWriter;
|
|
7855
7855
|
class ColumnWriterPageState;
|
|
7856
|
-
class
|
|
7856
|
+
class BasicColumnWriterState;
|
|
7857
7857
|
|
|
7858
7858
|
class ColumnWriterState {
|
|
7859
7859
|
public:
|
|
@@ -7875,9 +7875,6 @@ public:
|
|
|
7875
7875
|
};
|
|
7876
7876
|
|
|
7877
7877
|
class ColumnWriter {
|
|
7878
|
-
//! We limit the uncompressed page size to 100MB
|
|
7879
|
-
// The max size in Parquet is 2GB, but we choose a more conservative limit
|
|
7880
|
-
static constexpr const idx_t MAX_UNCOMPRESSED_PAGE_SIZE = 100000000;
|
|
7881
7878
|
|
|
7882
7879
|
public:
|
|
7883
7880
|
ColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path, idx_t max_repeat,
|
|
@@ -7901,46 +7898,20 @@ public:
|
|
|
7901
7898
|
idx_t max_repeat = 0, idx_t max_define = 1,
|
|
7902
7899
|
bool can_have_nulls = true);
|
|
7903
7900
|
|
|
7904
|
-
virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group);
|
|
7905
|
-
virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count);
|
|
7901
|
+
virtual unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) = 0;
|
|
7902
|
+
virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) = 0;
|
|
7906
7903
|
|
|
7907
|
-
virtual void BeginWrite(ColumnWriterState &state);
|
|
7908
|
-
virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count);
|
|
7909
|
-
virtual void FinalizeWrite(ColumnWriterState &state);
|
|
7904
|
+
virtual void BeginWrite(ColumnWriterState &state) = 0;
|
|
7905
|
+
virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count) = 0;
|
|
7906
|
+
virtual void FinalizeWrite(ColumnWriterState &state) = 0;
|
|
7910
7907
|
|
|
7911
7908
|
protected:
|
|
7912
7909
|
void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, idx_t count,
|
|
7913
7910
|
uint16_t define_value, uint16_t null_value);
|
|
7914
7911
|
void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat);
|
|
7915
7912
|
|
|
7916
|
-
void WriteLevels(Serializer &temp_writer, const vector<uint16_t> &levels, idx_t max_value, idx_t start_offset,
|
|
7917
|
-
idx_t count);
|
|
7918
|
-
|
|
7919
|
-
virtual duckdb_parquet::format::Encoding::type GetEncoding();
|
|
7920
|
-
|
|
7921
|
-
void NextPage(ColumnWriterState &state_p);
|
|
7922
|
-
void FlushPage(ColumnWriterState &state_p);
|
|
7923
|
-
void WriteDictionary(ColumnWriterState &state_p, unique_ptr<BufferedSerializer> temp_writer, idx_t row_count);
|
|
7924
|
-
|
|
7925
|
-
virtual void FlushDictionary(ColumnWriterState &state, ColumnWriterStatistics *stats);
|
|
7926
|
-
|
|
7927
|
-
//! Initializes the state used to track statistics during writing. Only used for scalar types.
|
|
7928
|
-
virtual unique_ptr<ColumnWriterStatistics> InitializeStatsState();
|
|
7929
|
-
//! Retrieves the row size of a vector at the specified location. Only used for scalar types.
|
|
7930
|
-
virtual idx_t GetRowSize(Vector &vector, idx_t index);
|
|
7931
|
-
//! Writes a (subset of a) vector to the specified serializer. Only used for scalar types.
|
|
7932
|
-
virtual void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state,
|
|
7933
|
-
Vector &vector, idx_t chunk_start, idx_t chunk_end);
|
|
7934
|
-
|
|
7935
|
-
//! Initialize the writer for a specific page. Only used for scalar types.
|
|
7936
|
-
virtual unique_ptr<ColumnWriterPageState> InitializePageState();
|
|
7937
|
-
//! Flushes the writer for a specific page. Only used for scalar types.
|
|
7938
|
-
virtual void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state);
|
|
7939
|
-
|
|
7940
7913
|
void CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
|
|
7941
7914
|
unique_ptr<data_t[]> &compressed_buf);
|
|
7942
|
-
|
|
7943
|
-
void SetParquetStatistics(StandardColumnWriterState &state, duckdb_parquet::format::ColumnChunk &column);
|
|
7944
7915
|
};
|
|
7945
7916
|
|
|
7946
7917
|
} // namespace duckdb
|
|
@@ -7953,6 +7924,7 @@ class FileOpener;
|
|
|
7953
7924
|
|
|
7954
7925
|
class ParquetWriter {
|
|
7955
7926
|
friend class ColumnWriter;
|
|
7927
|
+
friend class BasicColumnWriter;
|
|
7956
7928
|
friend class ListColumnWriter;
|
|
7957
7929
|
friend class StructColumnWriter;
|
|
7958
7930
|
|