duckdb 0.6.2-dev758.0 → 0.6.2-dev766.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.6.2-dev758.0",
5
+ "version": "0.6.2-dev766.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -185,7 +185,7 @@ ColumnWriterState::~ColumnWriterState() {
185
185
 
186
186
  void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data,
187
187
  unique_ptr<data_t[]> &compressed_buf) {
188
- switch (writer.codec) {
188
+ switch (writer.GetCodec()) {
189
189
  case CompressionCodec::UNCOMPRESSED:
190
190
  compressed_size = temp_writer.blob.size;
191
191
  compressed_data = temp_writer.blob.data.get();
@@ -396,10 +396,10 @@ unique_ptr<ColumnWriterState> BasicColumnWriter::InitializeWriteState(duckdb_par
396
396
  void BasicColumnWriter::RegisterToRowGroup(duckdb_parquet::format::RowGroup &row_group) {
397
397
  format::ColumnChunk column_chunk;
398
398
  column_chunk.__isset.meta_data = true;
399
- column_chunk.meta_data.codec = writer.codec;
399
+ column_chunk.meta_data.codec = writer.GetCodec();
400
400
  column_chunk.meta_data.path_in_schema = schema_path;
401
401
  column_chunk.meta_data.num_values = 0;
402
- column_chunk.meta_data.type = writer.file_meta_data.schema[schema_idx].type;
402
+ column_chunk.meta_data.type = writer.GetType(schema_idx);
403
403
  row_group.columns.push_back(move(column_chunk));
404
404
  }
405
405
 
@@ -645,7 +645,8 @@ void BasicColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
645
645
  // flush the last page (if any remains)
646
646
  FlushPage(state);
647
647
 
648
- auto start_offset = writer.writer->GetTotalWritten();
648
+ auto &column_writer = writer.GetWriter();
649
+ auto start_offset = column_writer.GetTotalWritten();
649
650
  auto page_offset = start_offset;
650
651
  // flush the dictionary
651
652
  if (HasDictionary(state)) {
@@ -665,14 +666,14 @@ void BasicColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
665
666
  idx_t total_uncompressed_size = 0;
666
667
  for (auto &write_info : state.write_info) {
667
668
  D_ASSERT(write_info.page_header.uncompressed_page_size > 0);
668
- auto header_start_offset = writer.writer->GetTotalWritten();
669
- write_info.page_header.write(writer.protocol.get());
669
+ auto header_start_offset = column_writer.GetTotalWritten();
670
+ write_info.page_header.write(writer.GetProtocol());
670
671
  // total uncompressed size in the column chunk includes the header size (!)
671
- total_uncompressed_size += writer.writer->GetTotalWritten() - header_start_offset;
672
+ total_uncompressed_size += column_writer.GetTotalWritten() - header_start_offset;
672
673
  total_uncompressed_size += write_info.page_header.uncompressed_page_size;
673
- writer.writer->WriteData(write_info.compressed_data, write_info.compressed_size);
674
+ column_writer.WriteData(write_info.compressed_data, write_info.compressed_size);
674
675
  }
675
- column_chunk.meta_data.total_compressed_size = writer.writer->GetTotalWritten() - start_offset;
676
+ column_chunk.meta_data.total_compressed_size = column_writer.GetTotalWritten() - start_offset;
676
677
  column_chunk.meta_data.total_uncompressed_size = total_uncompressed_size;
677
678
  }
678
679
 
@@ -26,11 +26,6 @@ class FileSystem;
26
26
  class FileOpener;
27
27
 
28
28
  class ParquetWriter {
29
- friend class ColumnWriter;
30
- friend class BasicColumnWriter;
31
- friend class ListColumnWriter;
32
- friend class StructColumnWriter;
33
-
34
29
  public:
35
30
  ParquetWriter(FileSystem &fs, string file_name, FileOpener *file_opener, vector<LogicalType> types,
36
31
  vector<string> names, duckdb_parquet::format::CompressionCodec::type codec);
@@ -42,6 +37,19 @@ public:
42
37
  static duckdb_parquet::format::Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type);
43
38
  static void SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::format::SchemaElement &schema_ele);
44
39
 
40
+ duckdb_apache::thrift::protocol::TProtocol *GetProtocol() {
41
+ return protocol.get();
42
+ }
43
+ duckdb_parquet::format::CompressionCodec::type GetCodec() {
44
+ return codec;
45
+ }
46
+ duckdb_parquet::format::Type::type GetType(idx_t schema_idx) {
47
+ return file_meta_data.schema[schema_idx].type;
48
+ }
49
+ BufferedFileWriter &GetWriter() {
50
+ return *writer;
51
+ }
52
+
45
53
  private:
46
54
  string file_name;
47
55
  vector<LogicalType> sql_types;
@@ -733,6 +733,17 @@ unique_ptr<LocalFunctionData> ParquetWriteInitializeLocal(ExecutionContext &cont
733
733
  return make_unique<ParquetWriteLocalState>(context.client, bind_data.sql_types);
734
734
  }
735
735
 
736
+ //===--------------------------------------------------------------------===//
737
+ // Parallel
738
+ //===--------------------------------------------------------------------===//
739
+ bool ParquetWriteIsParallel(ClientContext &context, FunctionData &bind_data) {
740
+ auto &config = DBConfig::GetConfig(context);
741
+ if (config.options.preserve_insertion_order) {
742
+ return false;
743
+ }
744
+ return true;
745
+ }
746
+
736
747
  unique_ptr<TableFunctionRef> ParquetScanReplacement(ClientContext &context, const string &table_name,
737
748
  ReplacementScanData *data) {
738
749
  auto lower_name = StringUtil::Lower(table_name);
@@ -769,6 +780,7 @@ void ParquetExtension::Load(DuckDB &db) {
769
780
  function.copy_to_sink = ParquetWriteSink;
770
781
  function.copy_to_combine = ParquetWriteCombine;
771
782
  function.copy_to_finalize = ParquetWriteFinalize;
783
+ function.parallel = ParquetWriteIsParallel;
772
784
  function.copy_from_bind = ParquetScanFunction::ParquetReadBind;
773
785
  function.copy_from_function = scan_fun.functions[0];
774
786
 
@@ -249,18 +249,17 @@ void ParquetWriter::Flush(ColumnDataCollection &buffer) {
249
249
  if (buffer.Count() == 0) {
250
250
  return;
251
251
  }
252
- lock_guard<mutex> glock(lock);
253
252
 
254
253
  // set up a new row group for this chunk collection
255
254
  ParquetRowGroup row_group;
256
255
  row_group.num_rows = buffer.Count();
257
- row_group.file_offset = writer->GetTotalWritten();
258
256
  row_group.__isset.file_offset = true;
259
257
 
258
+ vector<unique_ptr<ColumnWriterState>> states;
260
259
  // iterate over each of the columns of the chunk collection and write them
261
260
  D_ASSERT(buffer.ColumnCount() == column_writers.size());
262
261
  for (idx_t col_idx = 0; col_idx < buffer.ColumnCount(); col_idx++) {
263
- const unique_ptr<ColumnWriter> &col_writer = column_writers[col_idx];
262
+ const auto &col_writer = column_writers[col_idx];
264
263
  auto write_state = col_writer->InitializeWriteState(row_group, buffer.GetAllocator());
265
264
  if (col_writer->HasAnalyze()) {
266
265
  for (auto &chunk : buffer.Chunks()) {
@@ -275,6 +274,14 @@ void ParquetWriter::Flush(ColumnDataCollection &buffer) {
275
274
  for (auto &chunk : buffer.Chunks()) {
276
275
  col_writer->Write(*write_state, chunk.data[col_idx], chunk.size());
277
276
  }
277
+ states.push_back(move(write_state));
278
+ }
279
+
280
+ lock_guard<mutex> glock(lock);
281
+ row_group.file_offset = writer->GetTotalWritten();
282
+ for (idx_t col_idx = 0; col_idx < buffer.ColumnCount(); col_idx++) {
283
+ const auto &col_writer = column_writers[col_idx];
284
+ auto write_state = move(states[col_idx]);
278
285
  col_writer->FinalizeWrite(*write_state);
279
286
  }
280
287
 
@@ -40,7 +40,7 @@ void MoveTmpFile(ClientContext &context, const string &tmp_file_path) {
40
40
  PhysicalCopyToFile::PhysicalCopyToFile(vector<LogicalType> types, CopyFunction function_p,
41
41
  unique_ptr<FunctionData> bind_data, idx_t estimated_cardinality)
42
42
  : PhysicalOperator(PhysicalOperatorType::COPY_TO_FILE, move(types), estimated_cardinality),
43
- function(move(function_p)), bind_data(move(bind_data)) {
43
+ function(move(function_p)), bind_data(move(bind_data)), parallel(false) {
44
44
  }
45
45
 
46
46
  SinkResultType PhysicalCopyToFile::Sink(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate,
@@ -18,6 +18,9 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile
18
18
  copy->file_path = op.file_path;
19
19
  copy->use_tmp_file = use_tmp_file;
20
20
  copy->per_thread_output = op.per_thread_output;
21
+ if (op.function.parallel) {
22
+ copy->parallel = op.function.parallel(context, *copy->bind_data);
23
+ }
21
24
 
22
25
  copy->children.push_back(move(plan));
23
26
  return move(copy);
@@ -8,6 +8,7 @@
8
8
  #include "duckdb/common/types/string_type.hpp"
9
9
  #include "duckdb/common/vector_operations/vector_operations.hpp"
10
10
  #include "duckdb/function/scalar/string_functions.hpp"
11
+ #include "duckdb/main/config.hpp"
11
12
  #include <limits>
12
13
 
13
14
  namespace duckdb {
@@ -390,6 +391,17 @@ void WriteCSVFinalize(ClientContext &context, FunctionData &bind_data, GlobalFun
390
391
  global_state.handle.reset();
391
392
  }
392
393
 
394
+ //===--------------------------------------------------------------------===//
395
+ // Parallel
396
+ //===--------------------------------------------------------------------===//
397
+ bool WriteCSVIsParallel(ClientContext &context, FunctionData &bind_data) {
398
+ auto &config = DBConfig::GetConfig(context);
399
+ if (config.options.preserve_insertion_order) {
400
+ return false;
401
+ }
402
+ return true;
403
+ }
404
+
393
405
  void CSVCopyFunction::RegisterFunction(BuiltinFunctions &set) {
394
406
  CopyFunction info("csv");
395
407
  info.copy_to_bind = WriteCSVBind;
@@ -398,6 +410,7 @@ void CSVCopyFunction::RegisterFunction(BuiltinFunctions &set) {
398
410
  info.copy_to_sink = WriteCSVSink;
399
411
  info.copy_to_combine = WriteCSVCombine;
400
412
  info.copy_to_finalize = WriteCSVFinalize;
413
+ info.parallel = WriteCSVIsParallel;
401
414
 
402
415
  info.copy_from_bind = ReadCSVBind;
403
416
  info.copy_from_function = ReadCSVTableFunction::GetFunction();
@@ -1,8 +1,8 @@
1
1
  #ifndef DUCKDB_VERSION
2
- #define DUCKDB_VERSION "0.6.2-dev758"
2
+ #define DUCKDB_VERSION "0.6.2-dev766"
3
3
  #endif
4
4
  #ifndef DUCKDB_SOURCE_ID
5
- #define DUCKDB_SOURCE_ID "cd29769dcd"
5
+ #define DUCKDB_SOURCE_ID "81c7b8cfb6"
6
6
  #endif
7
7
  #include "duckdb/function/table/system_functions.hpp"
8
8
  #include "duckdb/main/database.hpp"
@@ -24,6 +24,7 @@ public:
24
24
  unique_ptr<FunctionData> bind_data;
25
25
  string file_path;
26
26
  bool use_tmp_file;
27
+ bool parallel;
27
28
  bool per_thread_output;
28
29
 
29
30
  public:
@@ -51,7 +52,7 @@ public:
51
52
  }
52
53
 
53
54
  bool ParallelSink() const override {
54
- return per_thread_output;
55
+ return per_thread_output || parallel;
55
56
  }
56
57
  };
57
58
  } // namespace duckdb
@@ -44,13 +44,14 @@ typedef unique_ptr<FunctionData> (*copy_to_deserialize_t)(ClientContext &context
44
44
  typedef unique_ptr<FunctionData> (*copy_from_bind_t)(ClientContext &context, CopyInfo &info,
45
45
  vector<string> &expected_names,
46
46
  vector<LogicalType> &expected_types);
47
+ typedef bool (*copy_to_is_parallel_t)(ClientContext &context, FunctionData &bind_data);
47
48
 
48
49
  class CopyFunction : public Function {
49
50
  public:
50
51
  explicit CopyFunction(string name)
51
52
  : Function(name), copy_to_bind(nullptr), copy_to_initialize_local(nullptr), copy_to_initialize_global(nullptr),
52
- copy_to_sink(nullptr), copy_to_combine(nullptr), copy_to_finalize(nullptr), serialize(nullptr),
53
- deserialize(nullptr), copy_from_bind(nullptr) {
53
+ copy_to_sink(nullptr), copy_to_combine(nullptr), copy_to_finalize(nullptr), parallel(nullptr),
54
+ serialize(nullptr), deserialize(nullptr), copy_from_bind(nullptr) {
54
55
  }
55
56
 
56
57
  copy_to_bind_t copy_to_bind;
@@ -59,6 +60,7 @@ public:
59
60
  copy_to_sink_t copy_to_sink;
60
61
  copy_to_combine_t copy_to_combine;
61
62
  copy_to_finalize_t copy_to_finalize;
63
+ copy_to_is_parallel_t parallel;
62
64
 
63
65
  copy_to_serialize_t serialize;
64
66
  copy_to_deserialize_t deserialize;
@@ -28,6 +28,8 @@ struct FileHandle;
28
28
  //! The version number of the database storage format
29
29
  extern const uint64_t VERSION_NUMBER;
30
30
 
31
+ const char *GetDuckDBVersion(idx_t version_number);
32
+
31
33
  using block_id_t = int64_t;
32
34
 
33
35
  #define INVALID_BLOCK (-1)
@@ -46,6 +46,29 @@ MainHeader MainHeader::Deserialize(Deserializer &source) {
46
46
  throw IOException("The file is not a valid DuckDB database file!");
47
47
  }
48
48
  header.version_number = source.Read<uint64_t>();
49
+ // check the version number
50
+ if (header.version_number != VERSION_NUMBER) {
51
+ auto version = GetDuckDBVersion(header.version_number);
52
+ string version_text;
53
+ if (version) {
54
+ // known version
55
+ version_text = "DuckDB version " + string(version);
56
+ } else {
57
+ version_text = string("an ") + (VERSION_NUMBER > header.version_number ? "older development" : "newer") +
58
+ string(" version of DuckDB");
59
+ }
60
+ throw IOException(
61
+ "Trying to read a database file with version number %lld, but we can only read version %lld.\n"
62
+ "The database file was created with %s.\n\n"
63
+ "The storage of DuckDB is not yet stable; newer versions of DuckDB cannot read old database files and "
64
+ "vice versa.\n"
65
+ "The storage will be stabilized when version 1.0 releases.\n\n"
66
+ "For now, we recommend that you load the database file in a supported version of DuckDB, and use the "
67
+ "EXPORT DATABASE command "
68
+ "followed by IMPORT DATABASE on the current version of DuckDB.\n\n"
69
+ "See the storage page for more information: https://duckdb.org/internals/storage",
70
+ header.version_number, VERSION_NUMBER, version_text);
71
+ }
49
72
  // read the flags
50
73
  FieldReader reader(source);
51
74
  for (idx_t i = 0; i < FLAG_COUNT; i++) {
@@ -150,20 +173,7 @@ SingleFileBlockManager::SingleFileBlockManager(DatabaseInstance &db, string path
150
173
  MainHeader::CheckMagicBytes(*handle);
151
174
  // otherwise, we check the metadata of the file
152
175
  header_buffer.ReadAndChecksum(*handle, 0);
153
- MainHeader header = DeserializeHeaderStructure<MainHeader>(header_buffer.buffer);
154
- // check the version number
155
- if (header.version_number != VERSION_NUMBER) {
156
- throw IOException(
157
- "Trying to read a database file with version number %lld, but we can only read version %lld.\n"
158
- "The database file was created with an %s version of DuckDB.\n\n"
159
- "The storage of DuckDB is not yet stable; newer versions of DuckDB cannot read old database files and "
160
- "vice versa.\n"
161
- "The storage will be stabilized when version 1.0 releases.\n\n"
162
- "For now, we recommend that you load the database file in a supported version of DuckDB, and use the "
163
- "EXPORT DATABASE command "
164
- "followed by IMPORT DATABASE on the current version of DuckDB.",
165
- header.version_number, VERSION_NUMBER, VERSION_NUMBER > header.version_number ? "older" : "newer");
166
- }
176
+ DeserializeHeaderStructure<MainHeader>(header_buffer.buffer);
167
177
 
168
178
  // read the database headers from disk
169
179
  DatabaseHeader h1, h2;
@@ -4,4 +4,35 @@ namespace duckdb {
4
4
 
5
5
  const uint64_t VERSION_NUMBER = 40;
6
6
 
7
+ struct StorageVersionInfo {
8
+ const char *version_name;
9
+ idx_t storage_version;
10
+ };
11
+
12
+ static StorageVersionInfo storage_version_info[] = {{"v0.6.0 or v0.6.1", 39},
13
+ {"v0.5.0 or v0.5.1", 38},
14
+ {"v0.3.3, v0.3.4 or v0.4.0", 33},
15
+ {"v0.3.2", 31},
16
+ {"v0.3.1", 27},
17
+ {"v0.3.0", 25},
18
+ {"v0.2.9", 21},
19
+ {"v0.2.8", 18},
20
+ {"v0.2.7", 17},
21
+ {"v0.2.6", 15},
22
+ {"v0.2.5", 13},
23
+ {"v0.2.4", 11},
24
+ {"v0.2.3", 6},
25
+ {"v0.2.2", 4},
26
+ {"v0.2.1 and prior", 1},
27
+ {nullptr, 0}};
28
+
29
+ const char *GetDuckDBVersion(idx_t version_number) {
30
+ for (idx_t i = 0; storage_version_info[i].version_name; i++) {
31
+ if (version_number == storage_version_info[i].storage_version) {
32
+ return storage_version_info[i].version_name;
33
+ }
34
+ }
35
+ return nullptr;
36
+ }
37
+
7
38
  } // namespace duckdb