duckdb 0.7.2-dev3353.0 → 0.7.2-dev3441.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/json/buffered_json_reader.cpp +2 -3
  3. package/src/duckdb/extension/json/include/json_functions.hpp +5 -1
  4. package/src/duckdb/extension/json/include/json_scan.hpp +1 -0
  5. package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
  6. package/src/duckdb/extension/json/json-extension.cpp +7 -3
  7. package/src/duckdb/extension/json/json_functions/copy_json.cpp +16 -5
  8. package/src/duckdb/extension/json/json_functions/json_create.cpp +220 -93
  9. package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +2 -2
  10. package/src/duckdb/extension/json/json_functions/json_transform.cpp +283 -117
  11. package/src/duckdb/extension/json/json_functions/read_json.cpp +8 -6
  12. package/src/duckdb/extension/json/json_functions.cpp +17 -15
  13. package/src/duckdb/extension/json/json_scan.cpp +8 -4
  14. package/src/duckdb/extension/parquet/column_reader.cpp +6 -2
  15. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +1 -2
  16. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +2 -2
  17. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +1 -0
  18. package/src/duckdb/extension/parquet/include/thrift_tools.hpp +3 -5
  19. package/src/duckdb/extension/parquet/parquet-extension.cpp +2 -4
  20. package/src/duckdb/extension/parquet/parquet_reader.cpp +11 -22
  21. package/src/duckdb/extension/parquet/parquet_statistics.cpp +5 -0
  22. package/src/duckdb/extension/parquet/parquet_writer.cpp +4 -4
  23. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +2 -2
  24. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  25. package/src/duckdb/src/common/file_system.cpp +13 -20
  26. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +2 -2
  27. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +10 -7
  28. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +3 -0
  29. package/src/duckdb/src/execution/index/art/art.cpp +3 -1
  30. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +2 -1
  31. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +2 -2
  32. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +1 -1
  33. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +1 -2
  34. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +4 -5
  35. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +1 -1
  36. package/src/duckdb/src/function/cast/cast_function_set.cpp +89 -25
  37. package/src/duckdb/src/function/pragma/pragma_queries.cpp +20 -15
  38. package/src/duckdb/src/function/table/copy_csv.cpp +4 -5
  39. package/src/duckdb/src/function/table/read_csv.cpp +6 -5
  40. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  41. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +0 -1
  42. package/src/duckdb/src/include/duckdb/common/file_system.hpp +7 -6
  43. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +118 -0
  44. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -2
  45. package/src/duckdb/src/include/duckdb/common/types/type_map.hpp +19 -1
  46. package/src/duckdb/src/include/duckdb/execution/operator/persistent/base_csv_reader.hpp +3 -2
  47. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_line_info.hpp +1 -0
  48. package/src/duckdb/src/include/duckdb/main/client_data.hpp +4 -0
  49. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +5 -5
  50. package/src/duckdb/src/include/duckdb/planner/binder.hpp +3 -2
  51. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +2 -0
  52. package/src/duckdb/src/main/client_context.cpp +1 -4
  53. package/src/duckdb/src/main/client_data.cpp +19 -0
  54. package/src/duckdb/src/main/database.cpp +4 -1
  55. package/src/duckdb/src/main/extension/extension_install.cpp +5 -6
  56. package/src/duckdb/src/main/extension/extension_load.cpp +11 -16
  57. package/src/duckdb/src/main/settings/settings.cpp +2 -3
  58. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +1 -1
  59. package/src/duckdb/src/optimizer/join_order/join_order_optimizer.cpp +25 -1
  60. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +32 -35
  61. package/src/duckdb/src/storage/table/row_group_collection.cpp +41 -25
  62. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +7998 -7955
@@ -6,8 +6,8 @@
6
6
  #include "duckdb/function/replacement_scan.hpp"
7
7
  #include "duckdb/parser/expression/constant_expression.hpp"
8
8
  #include "duckdb/parser/expression/function_expression.hpp"
9
- #include "duckdb/parser/tableref/table_function_ref.hpp"
10
9
  #include "duckdb/parser/parsed_data/create_pragma_function_info.hpp"
10
+ #include "duckdb/parser/tableref/table_function_ref.hpp"
11
11
 
12
12
  namespace duckdb {
13
13
 
@@ -115,6 +115,14 @@ unique_ptr<FunctionLocalState> JSONFunctionLocalState::Init(ExpressionState &sta
115
115
  return make_uniq<JSONFunctionLocalState>(state.GetContext());
116
116
  }
117
117
 
118
+ unique_ptr<FunctionLocalState> JSONFunctionLocalState::InitCastLocalState(CastLocalStateParameters &parameters) {
119
+ if (parameters.context) {
120
+ return make_uniq<JSONFunctionLocalState>(*parameters.context);
121
+ } else {
122
+ return make_uniq<JSONFunctionLocalState>(Allocator::DefaultAllocator());
123
+ }
124
+ }
125
+
118
126
  JSONFunctionLocalState &JSONFunctionLocalState::ResetAndGet(ExpressionState &state) {
119
127
  auto &lstate = ExecuteFunctionState::GetFunctionState(state)->Cast<JSONFunctionLocalState>();
120
128
  lstate.json_allocator.Reset();
@@ -197,14 +205,6 @@ unique_ptr<TableRef> JSONFunctions::ReadJSONReplacement(ClientContext &context,
197
205
  return std::move(table_function);
198
206
  }
199
207
 
200
- static duckdb::unique_ptr<FunctionLocalState> InitJSONCastLocalState(CastLocalStateParameters &parameters) {
201
- if (parameters.context) {
202
- return make_uniq<JSONFunctionLocalState>(*parameters.context);
203
- } else {
204
- return make_uniq<JSONFunctionLocalState>(Allocator::DefaultAllocator());
205
- }
206
- }
207
-
208
208
  static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
209
209
  auto &lstate = parameters.local_state->Cast<JSONFunctionLocalState>();
210
210
  lstate.json_allocator.Reset();
@@ -215,15 +215,17 @@ static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastP
215
215
  source, result, count, [&](string_t input, ValidityMask &mask, idx_t idx) {
216
216
  auto data = (char *)(input.GetData());
217
217
  auto length = input.GetSize();
218
- yyjson_read_err error;
219
218
 
219
+ yyjson_read_err error;
220
220
  auto doc = JSONCommon::ReadDocumentUnsafe(data, length, JSONCommon::READ_FLAG, alc, &error);
221
221
 
222
222
  if (!doc) {
223
- HandleCastError::AssignError(JSONCommon::FormatParseError(data, length, error),
224
- parameters.error_message);
225
223
  mask.SetInvalid(idx);
226
- success = false;
224
+ if (success) {
225
+ HandleCastError::AssignError(JSONCommon::FormatParseError(data, length, error),
226
+ parameters.error_message);
227
+ success = false;
228
+ }
227
229
  }
228
230
  return input;
229
231
  });
@@ -231,13 +233,13 @@ static bool CastVarcharToJSON(Vector &source, Vector &result, idx_t count, CastP
231
233
  return success;
232
234
  }
233
235
 
234
- void JSONFunctions::RegisterCastFunctions(CastFunctionSet &casts) {
236
+ void JSONFunctions::RegisterSimpleCastFunctions(CastFunctionSet &casts) {
235
237
  // JSON to VARCHAR is basically free
236
238
  casts.RegisterCastFunction(JSONCommon::JSONType(), LogicalType::VARCHAR, DefaultCasts::ReinterpretCast, 1);
237
239
 
238
240
  // VARCHAR to JSON requires a parse so it's not free. Let's make it 1 more than a cast to STRUCT
239
241
  auto varchar_to_json_cost = casts.ImplicitCastCost(LogicalType::SQLNULL, LogicalTypeId::STRUCT) + 1;
240
- BoundCastInfo info(CastVarcharToJSON, nullptr, InitJSONCastLocalState);
242
+ BoundCastInfo info(CastVarcharToJSON, nullptr, JSONFunctionLocalState::InitCastLocalState);
241
243
  casts.RegisterCastFunction(LogicalType::VARCHAR, JSONCommon::JSONType(), std::move(info), varchar_to_json_cost);
242
244
 
243
245
  // Register NULL to JSON with a different cost than NULL to VARCHAR so the binder can disambiguate functions
@@ -1,10 +1,10 @@
1
1
  #include "json_scan.hpp"
2
2
 
3
+ #include "duckdb/common/multi_file_reader.hpp"
3
4
  #include "duckdb/main/database.hpp"
4
5
  #include "duckdb/main/extension_helper.hpp"
5
6
  #include "duckdb/parallel/task_scheduler.hpp"
6
7
  #include "duckdb/storage/buffer_manager.hpp"
7
- #include "duckdb/common/multi_file_reader.hpp"
8
8
 
9
9
  namespace duckdb {
10
10
 
@@ -59,11 +59,15 @@ unique_ptr<FunctionData> JSONScanData::Bind(ClientContext &context, TableFunctio
59
59
  }
60
60
 
61
61
  void JSONScanData::InitializeFormats() {
62
+ InitializeFormats(auto_detect);
63
+ }
64
+
65
+ void JSONScanData::InitializeFormats(bool auto_detect_p) {
62
66
  // Set defaults for date/timestamp formats if we need to
63
- if (!auto_detect && date_format.empty()) {
67
+ if (!auto_detect_p && date_format.empty()) {
64
68
  date_format = "%Y-%m-%d";
65
69
  }
66
- if (!auto_detect && timestamp_format.empty()) {
70
+ if (!auto_detect_p && timestamp_format.empty()) {
67
71
  timestamp_format = "%Y-%m-%dT%H:%M:%S.%fZ";
68
72
  }
69
73
 
@@ -75,7 +79,7 @@ void JSONScanData::InitializeFormats() {
75
79
  date_format_map.AddFormat(LogicalTypeId::TIMESTAMP, timestamp_format);
76
80
  }
77
81
 
78
- if (auto_detect) {
82
+ if (auto_detect_p) {
79
83
  static const unordered_map<LogicalTypeId, vector<const char *>, LogicalTypeIdHash> FORMAT_TEMPLATES = {
80
84
  {LogicalTypeId::DATE, {"%m-%d-%Y", "%m-%d-%y", "%d-%m-%Y", "%d-%m-%y", "%Y-%m-%d", "%y-%m-%d"}},
81
85
  {LogicalTypeId::TIMESTAMP,
@@ -589,8 +589,8 @@ StringColumnReader::StringColumnReader(ParquetReader &reader, LogicalType type_p
589
589
  }
590
590
  }
591
591
 
592
- uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) {
593
- if (Type() != LogicalTypeId::VARCHAR) {
592
+ uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, const bool is_varchar) {
593
+ if (!is_varchar) {
594
594
  return str_len;
595
595
  }
596
596
  // verify if a string is actually UTF8, and if there are no null bytes in the middle of the string
@@ -605,6 +605,10 @@ uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len
605
605
  return str_len;
606
606
  }
607
607
 
608
+ uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) {
609
+ return VerifyString(str_data, str_len, Type() == LogicalTypeId::VARCHAR);
610
+ }
611
+
608
612
  void StringColumnReader::Dictionary(shared_ptr<ResizeableBuffer> data, idx_t num_entries) {
609
613
  dict = std::move(data);
610
614
  dict_strings = duckdb::unique_ptr<string_t[]>(new string_t[num_entries]);
@@ -80,15 +80,14 @@ public:
80
80
 
81
81
  class ParquetReader {
82
82
  public:
83
- ParquetReader(Allocator &allocator, unique_ptr<FileHandle> file_handle_p);
84
83
  ParquetReader(ClientContext &context, string file_name, ParquetOptions parquet_options);
85
84
  ParquetReader(ClientContext &context, ParquetOptions parquet_options,
86
85
  shared_ptr<ParquetFileMetadataCache> metadata);
87
86
  ~ParquetReader();
88
87
 
88
+ FileSystem &fs;
89
89
  Allocator &allocator;
90
90
  string file_name;
91
- FileOpener *file_opener;
92
91
  vector<LogicalType> return_types;
93
92
  vector<string> names;
94
93
  shared_ptr<ParquetFileMetadataCache> metadata;
@@ -32,8 +32,8 @@ struct PreparedRowGroup {
32
32
 
33
33
  class ParquetWriter {
34
34
  public:
35
- ParquetWriter(FileSystem &fs, string file_name, FileOpener *file_opener, vector<LogicalType> types,
36
- vector<string> names, duckdb_parquet::format::CompressionCodec::type codec);
35
+ ParquetWriter(FileSystem &fs, string file_name, vector<LogicalType> types, vector<string> names,
36
+ duckdb_parquet::format::CompressionCodec::type codec);
37
37
 
38
38
  public:
39
39
  void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);
@@ -39,6 +39,7 @@ public:
39
39
  void PrepareDeltaByteArray(ResizeableBuffer &buffer) override;
40
40
  void DeltaByteArray(uint8_t *defines, idx_t num_values, parquet_filter_t &filter, idx_t result_offset,
41
41
  Vector &result) override;
42
+ static uint32_t VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar);
42
43
  uint32_t VerifyString(const char *str_data, uint32_t str_len);
43
44
 
44
45
  protected:
@@ -51,8 +51,7 @@ struct ReadHeadComparator {
51
51
  // 1: register all ranges that will be read, merging ranges that are consecutive
52
52
  // 2: prefetch all registered ranges
53
53
  struct ReadAheadBuffer {
54
- ReadAheadBuffer(Allocator &allocator, FileHandle &handle, FileOpener &opener)
55
- : allocator(allocator), handle(handle), file_opener(opener) {
54
+ ReadAheadBuffer(Allocator &allocator, FileHandle &handle) : allocator(allocator), handle(handle) {
56
55
  }
57
56
 
58
57
  // The list of read heads
@@ -62,7 +61,6 @@ struct ReadAheadBuffer {
62
61
 
63
62
  Allocator &allocator;
64
63
  FileHandle &handle;
65
- FileOpener &file_opener;
66
64
 
67
65
  idx_t total_size = 0;
68
66
 
@@ -124,8 +122,8 @@ class ThriftFileTransport : public duckdb_apache::thrift::transport::TVirtualTra
124
122
  public:
125
123
  static constexpr uint64_t PREFETCH_FALLBACK_BUFFERSIZE = 1000000;
126
124
 
127
- ThriftFileTransport(Allocator &allocator, FileHandle &handle_p, FileOpener &opener, bool prefetch_mode_p)
128
- : handle(handle_p), location(0), allocator(allocator), ra_buffer(ReadAheadBuffer(allocator, handle_p, opener)),
125
+ ThriftFileTransport(Allocator &allocator, FileHandle &handle_p, bool prefetch_mode_p)
126
+ : handle(handle_p), location(0), allocator(allocator), ra_buffer(ReadAheadBuffer(allocator, handle_p)),
129
127
  prefetch_mode(prefetch_mode_p) {
130
128
  }
131
129
 
@@ -239,8 +239,7 @@ public:
239
239
  // missing metadata entry in cache, no usable stats
240
240
  return nullptr;
241
241
  }
242
- auto handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
243
- FileSystem::DEFAULT_COMPRESSION, FileSystem::GetFileOpener(context));
242
+ auto handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ);
244
243
  // we need to check if the metadata cache entries are current
245
244
  if (fs.GetLastModifiedTime(*handle) >= metadata->read_time) {
246
245
  // missing or invalid metadata entry in cache, no usable stats overall
@@ -627,8 +626,7 @@ unique_ptr<GlobalFunctionData> ParquetWriteInitializeGlobal(ClientContext &conte
627
626
 
628
627
  auto &fs = FileSystem::GetFileSystem(context);
629
628
  global_state->writer =
630
- make_uniq<ParquetWriter>(fs, file_path, FileSystem::GetFileOpener(context), parquet_bind.sql_types,
631
- parquet_bind.column_names, parquet_bind.codec);
629
+ make_uniq<ParquetWriter>(fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, parquet_bind.codec);
632
630
  return std::move(global_state);
633
631
  }
634
632
 
@@ -49,16 +49,15 @@ using duckdb_parquet::format::Statistics;
49
49
  using duckdb_parquet::format::Type;
50
50
 
51
51
  static duckdb::unique_ptr<duckdb_apache::thrift::protocol::TProtocol>
52
- CreateThriftProtocol(Allocator &allocator, FileHandle &file_handle, FileOpener &opener, bool prefetch_mode) {
53
- auto transport = make_shared<ThriftFileTransport>(allocator, file_handle, opener, prefetch_mode);
52
+ CreateThriftProtocol(Allocator &allocator, FileHandle &file_handle, bool prefetch_mode) {
53
+ auto transport = make_shared<ThriftFileTransport>(allocator, file_handle, prefetch_mode);
54
54
  return make_uniq<duckdb_apache::thrift::protocol::TCompactProtocolT<ThriftFileTransport>>(std::move(transport));
55
55
  }
56
56
 
57
- static shared_ptr<ParquetFileMetadataCache> LoadMetadata(Allocator &allocator, FileHandle &file_handle,
58
- FileOpener &opener) {
57
+ static shared_ptr<ParquetFileMetadataCache> LoadMetadata(Allocator &allocator, FileHandle &file_handle) {
59
58
  auto current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
60
59
 
61
- auto proto = CreateThriftProtocol(allocator, file_handle, opener, false);
60
+ auto proto = CreateThriftProtocol(allocator, file_handle, false);
62
61
  auto &transport = ((ThriftFileTransport &)*proto->getTransport());
63
62
  auto file_size = transport.GetSize();
64
63
  if (file_size < 12) {
@@ -428,20 +427,11 @@ ParquetOptions::ParquetOptions(ClientContext &context) {
428
427
  }
429
428
  }
430
429
 
431
- ParquetReader::ParquetReader(Allocator &allocator_p, unique_ptr<FileHandle> file_handle_p) : allocator(allocator_p) {
432
- file_name = file_handle_p->path;
433
- file_handle = std::move(file_handle_p);
434
- metadata = LoadMetadata(allocator, *file_handle, *file_opener);
435
- InitializeSchema();
436
- }
437
-
438
430
  ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, ParquetOptions parquet_options_p)
439
- : allocator(BufferAllocator::Get(context_p)), file_opener(FileSystem::GetFileOpener(context_p)),
431
+ : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
440
432
  parquet_options(parquet_options_p) {
441
- auto &fs = FileSystem::GetFileSystem(context_p);
442
433
  file_name = std::move(file_name_p);
443
- file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK,
444
- FileSystem::DEFAULT_COMPRESSION, file_opener);
434
+ file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ);
445
435
  if (!file_handle->CanSeek()) {
446
436
  throw NotImplementedException(
447
437
  "Reading parquet files from a FIFO stream is not supported and cannot be efficiently supported since "
@@ -451,12 +441,12 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
451
441
  // or if this file has cached metadata
452
442
  // or if the cached version already expired
453
443
  if (!ObjectCache::ObjectCacheEnabled(context_p)) {
454
- metadata = LoadMetadata(allocator, *file_handle, *file_opener);
444
+ metadata = LoadMetadata(allocator, *file_handle);
455
445
  } else {
456
446
  auto last_modify_time = fs.GetLastModifiedTime(*file_handle);
457
447
  metadata = ObjectCache::GetObjectCache(context_p).Get<ParquetFileMetadataCache>(file_name);
458
448
  if (!metadata || (last_modify_time + 10 >= metadata->read_time)) {
459
- metadata = LoadMetadata(allocator, *file_handle, *file_opener);
449
+ metadata = LoadMetadata(allocator, *file_handle);
460
450
  ObjectCache::GetObjectCache(context_p).Put(file_name, metadata);
461
451
  }
462
452
  }
@@ -466,7 +456,7 @@ ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, Parqu
466
456
 
467
457
  ParquetReader::ParquetReader(ClientContext &context_p, ParquetOptions parquet_options_p,
468
458
  shared_ptr<ParquetFileMetadataCache> metadata_p)
469
- : allocator(BufferAllocator::Get(context_p)), file_opener(FileSystem::GetFileOpener(context_p)),
459
+ : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)),
470
460
  metadata(std::move(metadata_p)), parquet_options(parquet_options_p) {
471
461
  InitializeSchema();
472
462
  }
@@ -634,11 +624,10 @@ void ParquetReader::InitializeScan(ParquetReaderScanState &state, vector<idx_t>
634
624
  state.prefetch_mode = false;
635
625
  }
636
626
 
637
- state.file_handle = file_handle->file_system.OpenFile(file_handle->path, flags, FileSystem::DEFAULT_LOCK,
638
- FileSystem::DEFAULT_COMPRESSION, file_opener);
627
+ state.file_handle = fs.OpenFile(file_handle->path, flags);
639
628
  }
640
629
 
641
- state.thrift_file_proto = CreateThriftProtocol(allocator, *state.file_handle, *file_opener, state.prefetch_mode);
630
+ state.thrift_file_proto = CreateThriftProtocol(allocator, *state.file_handle, state.prefetch_mode);
642
631
  state.root_reader = CreateReader();
643
632
  state.define_buf.resize(allocator, STANDARD_VECTOR_SIZE);
644
633
  state.repeat_buf.resize(allocator, STANDARD_VECTOR_SIZE);
@@ -1,6 +1,7 @@
1
1
  #include "parquet_statistics.hpp"
2
2
  #include "parquet_decimal_utils.hpp"
3
3
  #include "parquet_timestamp.hpp"
4
+ #include "string_column_reader.hpp"
4
5
  #include "duckdb.hpp"
5
6
  #ifndef DUCKDB_AMALGAMATION
6
7
  #include "duckdb/common/types/blob.hpp"
@@ -253,15 +254,19 @@ unique_ptr<BaseStatistics> ParquetStatisticsUtils::TransformColumnStatistics(con
253
254
  case LogicalTypeId::VARCHAR: {
254
255
  auto string_stats = StringStats::CreateEmpty(type);
255
256
  if (parquet_stats.__isset.min) {
257
+ StringColumnReader::VerifyString(parquet_stats.min.c_str(), parquet_stats.min.size(), true);
256
258
  StringStats::Update(string_stats, parquet_stats.min);
257
259
  } else if (parquet_stats.__isset.min_value) {
260
+ StringColumnReader::VerifyString(parquet_stats.min_value.c_str(), parquet_stats.min_value.size(), true);
258
261
  StringStats::Update(string_stats, parquet_stats.min_value);
259
262
  } else {
260
263
  return nullptr;
261
264
  }
262
265
  if (parquet_stats.__isset.max) {
266
+ StringColumnReader::VerifyString(parquet_stats.max.c_str(), parquet_stats.max.size(), true);
263
267
  StringStats::Update(string_stats, parquet_stats.max);
264
268
  } else if (parquet_stats.__isset.max_value) {
269
+ StringColumnReader::VerifyString(parquet_stats.max_value.c_str(), parquet_stats.max_value.size(), true);
265
270
  StringStats::Update(string_stats, parquet_stats.max_value);
266
271
  } else {
267
272
  return nullptr;
@@ -225,12 +225,12 @@ void VerifyUniqueNames(const vector<string> &names) {
225
225
  #endif
226
226
  }
227
227
 
228
- ParquetWriter::ParquetWriter(FileSystem &fs, string file_name_p, FileOpener *file_opener_p, vector<LogicalType> types_p,
229
- vector<string> names_p, CompressionCodec::type codec)
228
+ ParquetWriter::ParquetWriter(FileSystem &fs, string file_name_p, vector<LogicalType> types_p, vector<string> names_p,
229
+ CompressionCodec::type codec)
230
230
  : file_name(std::move(file_name_p)), sql_types(std::move(types_p)), column_names(std::move(names_p)), codec(codec) {
231
231
  // initialize the file writer
232
- writer = make_uniq<BufferedFileWriter>(
233
- fs, file_name.c_str(), FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW, file_opener_p);
232
+ writer = make_uniq<BufferedFileWriter>(fs, file_name.c_str(),
233
+ FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW);
234
234
  // parquet files start with the string "PAR1"
235
235
  writer->WriteData((const_data_ptr_t) "PAR1", 4);
236
236
  TCompactProtocolFactoryT<MyTransport> tproto_factory;
@@ -721,9 +721,9 @@ TableStorageInfo DuckTableEntry::GetStorageInfo(ClientContext &context) {
721
721
  storage->info->indexes.Scan([&](Index &index) {
722
722
  IndexInfo info;
723
723
  info.is_primary = index.IsPrimary();
724
- info.is_unique = index.IsUnique();
724
+ info.is_unique = index.IsUnique() || info.is_primary;
725
725
  info.is_foreign = index.IsForeign();
726
- index.column_id_set = index.column_id_set;
726
+ info.column_set = index.column_id_set;
727
727
  result.index_info.push_back(std::move(info));
728
728
  return false;
729
729
  });
@@ -621,7 +621,7 @@ void CatalogSet::Undo(CatalogEntry &entry) {
621
621
  auto &dependency_manager = catalog.GetDependencyManager();
622
622
  dependency_manager.EraseObject(to_be_removed_node);
623
623
  }
624
- if (entry.name != to_be_removed_node.name) {
624
+ if (!StringUtil::CIEquals(entry.name, to_be_removed_node.name)) {
625
625
  // rename: clean up the new name when the rename is rolled back
626
626
  auto removed_entry = mapping.find(to_be_removed_node.name);
627
627
  if (removed_entry->second->child) {
@@ -40,11 +40,8 @@ FileSystem::~FileSystem() {
40
40
  }
41
41
 
42
42
  FileSystem &FileSystem::GetFileSystem(ClientContext &context) {
43
- return FileSystem::GetFileSystem(*context.db);
44
- }
45
-
46
- FileOpener *FileSystem::GetFileOpener(ClientContext &context) {
47
- return ClientData::Get(context).file_opener.get();
43
+ auto &client_data = ClientData::Get(context);
44
+ return *client_data.client_file_system;
48
45
  }
49
46
 
50
47
  bool PathMatched(const string &path, const string &sub_path) {
@@ -193,7 +190,7 @@ string FileSystem::ExtractBaseName(const string &path) {
193
190
  return vec[0];
194
191
  }
195
192
 
196
- string FileSystem::GetHomeDirectory(FileOpener *opener) {
193
+ string FileSystem::GetHomeDirectory(optional_ptr<FileOpener> opener) {
197
194
  // read the home_directory setting first, if it is set
198
195
  if (opener) {
199
196
  Value result;
@@ -215,7 +212,11 @@ string FileSystem::GetHomeDirectory(FileOpener *opener) {
215
212
  return string();
216
213
  }
217
214
 
218
- string FileSystem::ExpandPath(const string &path, FileOpener *opener) {
215
+ string FileSystem::GetHomeDirectory() {
216
+ return GetHomeDirectory(nullptr);
217
+ }
218
+
219
+ string FileSystem::ExpandPath(const string &path, optional_ptr<FileOpener> opener) {
219
220
  if (path.empty()) {
220
221
  return path;
221
222
  }
@@ -225,6 +226,10 @@ string FileSystem::ExpandPath(const string &path, FileOpener *opener) {
225
226
  return path;
226
227
  }
227
228
 
229
+ string FileSystem::ExpandPath(const string &path) {
230
+ return FileSystem::ExpandPath(path, nullptr);
231
+ }
232
+
228
233
  // LCOV_EXCL_START
229
234
  unique_ptr<FileHandle> FileSystem::OpenFile(const string &path, uint8_t flags, FileLockType lock,
230
235
  FileCompressionType compression, FileOpener *opener) {
@@ -247,14 +252,6 @@ int64_t FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes) {
247
252
  throw NotImplementedException("%s: Write is not implemented!", GetName());
248
253
  }
249
254
 
250
- string FileSystem::GetFileExtension(FileHandle &handle) {
251
- auto dot_location = handle.path.rfind('.');
252
- if (dot_location != std::string::npos) {
253
- return handle.path.substr(dot_location + 1, std::string::npos);
254
- }
255
- return string();
256
- }
257
-
258
255
  int64_t FileSystem::GetFileSize(FileHandle &handle) {
259
256
  throw NotImplementedException("%s: GetFileSize is not implemented!", GetName());
260
257
  }
@@ -312,10 +309,6 @@ vector<string> FileSystem::Glob(const string &path, FileOpener *opener) {
312
309
  throw NotImplementedException("%s: Glob is not implemented!", GetName());
313
310
  }
314
311
 
315
- vector<string> FileSystem::Glob(const string &path, ClientContext &context) {
316
- return Glob(path, GetFileOpener(context));
317
- }
318
-
319
312
  void FileSystem::RegisterSubSystem(unique_ptr<FileSystem> sub_fs) {
320
313
  throw NotImplementedException("%s: Can't register a sub system on a non-virtual file system", GetName());
321
314
  }
@@ -337,7 +330,7 @@ bool FileSystem::CanHandleFile(const string &fpath) {
337
330
  }
338
331
 
339
332
  vector<string> FileSystem::GlobFiles(const string &pattern, ClientContext &context, FileGlobOptions options) {
340
- auto result = Glob(pattern, context);
333
+ auto result = Glob(pattern);
341
334
  if (result.empty()) {
342
335
  string required_extension;
343
336
  const string prefixes[] = {"http://", "https://", "s3://"};
@@ -8,9 +8,9 @@ namespace duckdb {
8
8
  // Remove this when we switch C++17: https://stackoverflow.com/a/53350948
9
9
  constexpr uint8_t BufferedFileWriter::DEFAULT_OPEN_FLAGS;
10
10
 
11
- BufferedFileWriter::BufferedFileWriter(FileSystem &fs, const string &path_p, uint8_t open_flags, FileOpener *opener)
11
+ BufferedFileWriter::BufferedFileWriter(FileSystem &fs, const string &path_p, uint8_t open_flags)
12
12
  : fs(fs), path(path_p), data(unique_ptr<data_t[]>(new data_t[FILE_BUFFER_SIZE])), offset(0), total_written(0) {
13
- handle = fs.OpenFile(path, open_flags, FileLockType::WRITE_LOCK, FileSystem::DEFAULT_COMPRESSION, opener);
13
+ handle = fs.OpenFile(path, open_flags, FileLockType::WRITE_LOCK);
14
14
  }
15
15
 
16
16
  int64_t BufferedFileWriter::GetFileSize() {
@@ -60,11 +60,15 @@ static void AppendFilteredToResult(Vector &lambda_vector, list_entry_t *result_e
60
60
 
61
61
  idx_t true_count = 0;
62
62
  SelectionVector true_sel(elem_cnt);
63
- auto lambda_values = FlatVector::GetData<bool>(lambda_vector);
64
- auto &lambda_validity = FlatVector::Validity(lambda_vector);
63
+ UnifiedVectorFormat lambda_data;
64
+ lambda_vector.ToUnifiedFormat(elem_cnt, lambda_data);
65
+
66
+ auto lambda_values = (bool *)lambda_data.data;
67
+ auto &lambda_validity = lambda_data.validity;
65
68
 
66
69
  // compute the new lengths and offsets, and create a selection vector
67
70
  for (idx_t i = 0; i < elem_cnt; i++) {
71
+ auto entry = lambda_data.sel->get_index(i);
68
72
 
69
73
  while (appended_lists_cnt < lists_len.size() && lists_len[appended_lists_cnt] == 0) {
70
74
  result_entries[appended_lists_cnt].offset = curr_list_offset;
@@ -73,12 +77,11 @@ static void AppendFilteredToResult(Vector &lambda_vector, list_entry_t *result_e
73
77
  }
74
78
 
75
79
  // found a true value
76
- if (lambda_validity.RowIsValid(i)) {
77
- if (lambda_values[i] > 0) {
78
- true_sel.set_index(true_count++, i);
79
- curr_list_len++;
80
- }
80
+ if (lambda_validity.RowIsValid(entry) && lambda_values[entry] > 0) {
81
+ true_sel.set_index(true_count++, i);
82
+ curr_list_len++;
81
83
  }
84
+
82
85
  curr_original_list_len++;
83
86
 
84
87
  if (lists_len[appended_lists_cnt] == curr_original_list_len) {
@@ -74,6 +74,9 @@ static idx_t BetweenLoopTypeSwitch(Vector &input, Vector &lower, Vector &upper,
74
74
  case PhysicalType::VARCHAR:
75
75
  return TernaryExecutor::Select<string_t, string_t, string_t, OP>(input, lower, upper, sel, count, true_sel,
76
76
  false_sel);
77
+ case PhysicalType::INTERVAL:
78
+ return TernaryExecutor::Select<interval_t, interval_t, interval_t, OP>(input, lower, upper, sel, count,
79
+ true_sel, false_sel);
77
80
  default:
78
81
  throw InvalidTypeException(input.GetType(), "Invalid type for BETWEEN");
79
82
  }
@@ -130,6 +130,9 @@ static void TemplatedGenerateKeys(ArenaAllocator &allocator, Vector &input, idx_
130
130
  auto idx = idata.sel->get_index(i);
131
131
  if (idata.validity.RowIsValid(idx)) {
132
132
  ARTKey::CreateARTKey<T>(allocator, input.GetType(), keys[i], input_data[idx]);
133
+ } else {
134
+ // we need to possibly reset the former key value in the keys vector
135
+ keys[i] = ARTKey();
133
136
  }
134
137
  }
135
138
  }
@@ -680,7 +683,6 @@ Node ART::Lookup(Node node, const ARTKey &key, idx_t depth) {
680
683
  }
681
684
  return node;
682
685
  }
683
-
684
686
  auto &node_prefix = node.GetPrefix(*this);
685
687
  if (node_prefix.count) {
686
688
  for (idx_t pos = 0; pos < node_prefix.count; pos++) {
@@ -167,7 +167,6 @@ void PhysicalIndexJoin::GetRHSMatches(ExecutionContext &context, DataChunk &inpu
167
167
 
168
168
  auto &state = state_p.Cast<IndexJoinOperatorState>();
169
169
  auto &art = index.Cast<ART>();
170
- ;
171
170
 
172
171
  // generate the keys for this chunk
173
172
  state.arena_allocator.Reset();
@@ -214,6 +213,8 @@ OperatorResultType PhysicalIndexJoin::ExecuteInternal(ExecutionContext &context,
214
213
  state.lhs_idx = 0;
215
214
  state.rhs_idx = 0;
216
215
  state.first_fetch = true;
216
+ // reset the LHS chunk to reset the validity masks
217
+ state.join_keys.Reset();
217
218
  return OperatorResultType::NEED_MORE_INPUT;
218
219
  }
219
220
  //! Output vectors
@@ -35,7 +35,7 @@ string BaseCSVReader::GetLineNumberStr(idx_t line_error, bool is_line_estimated,
35
35
  BaseCSVReader::BaseCSVReader(ClientContext &context_p, BufferedCSVReaderOptions options_p,
36
36
  const vector<LogicalType> &requested_types)
37
37
  : context(context_p), fs(FileSystem::GetFileSystem(context)), allocator(Allocator::Get(context)),
38
- opener(FileSystem::GetFileOpener(context)), options(std::move(options_p)) {
38
+ options(std::move(options_p)) {
39
39
  }
40
40
 
41
41
  BaseCSVReader::~BaseCSVReader() {
@@ -43,7 +43,7 @@ BaseCSVReader::~BaseCSVReader() {
43
43
 
44
44
  unique_ptr<CSVFileHandle> BaseCSVReader::OpenCSV(const BufferedCSVReaderOptions &options_p) {
45
45
  auto file_handle = fs.OpenFile(options_p.file_path.c_str(), FileFlags::FILE_FLAGS_READ, FileLockType::NO_LOCK,
46
- options_p.compression, this->opener);
46
+ options_p.compression);
47
47
  if (file_handle->CanSeek()) {
48
48
  file_handle->Reset();
49
49
  }
@@ -636,10 +636,10 @@ void ParallelCSVReader::ParseCSV(DataChunk &insert_chunk) {
636
636
  }
637
637
 
638
638
  idx_t ParallelCSVReader::GetLineError(idx_t line_error, idx_t buffer_idx) {
639
-
640
639
  while (true) {
641
640
  if (buffer->line_info->CanItGetLine(file_idx, buffer_idx)) {
642
641
  auto cur_start = verification_positions.beginning_of_first_line + buffer->buffer->GetCSVGlobalStart();
642
+ // line errors are 1-indexed
643
643
  return buffer->line_info->GetLine(buffer_idx, line_error, file_idx, cur_start, false);
644
644
  }
645
645
  }
@@ -207,8 +207,7 @@ unique_ptr<GlobalSinkState> PhysicalCopyToFile::GetGlobalSinkState(ClientContext
207
207
  fs.CreateDirectory(file_path);
208
208
  } else if (!overwrite_or_ignore) {
209
209
  idx_t n_files = 0;
210
- fs.ListFiles(
211
- file_path, [&n_files](const string &path, bool) { n_files++; }, FileOpener::Get(context));
210
+ fs.ListFiles(file_path, [&n_files](const string &path, bool) { n_files++; });
212
211
  if (n_files > 0) {
213
212
  throw IOException("Directory %s is not empty! Enable OVERWRITE_OR_IGNORE option to force writing",
214
213
  file_path);
@@ -27,10 +27,10 @@ static void WriteCatalogEntries(stringstream &ss, vector<reference<CatalogEntry>
27
27
  ss << std::endl;
28
28
  }
29
29
 
30
- static void WriteStringStreamToFile(FileSystem &fs, FileOpener *opener, stringstream &ss, const string &path) {
30
+ static void WriteStringStreamToFile(FileSystem &fs, stringstream &ss, const string &path) {
31
31
  auto ss_string = ss.str();
32
32
  auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW,
33
- FileLockType::WRITE_LOCK, FileSystem::DEFAULT_COMPRESSION, opener);
33
+ FileLockType::WRITE_LOCK);
34
34
  fs.Write(*handle, (void *)ss_string.c_str(), ss_string.size());
35
35
  handle.reset();
36
36
  }
@@ -108,7 +108,6 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c
108
108
 
109
109
  auto &ccontext = context.client;
110
110
  auto &fs = FileSystem::GetFileSystem(ccontext);
111
- auto *opener = FileSystem::GetFileOpener(ccontext);
112
111
 
113
112
  // gather all catalog types to export
114
113
  vector<reference<CatalogEntry>> schemas;
@@ -172,7 +171,7 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c
172
171
  WriteCatalogEntries(ss, indexes);
173
172
  WriteCatalogEntries(ss, macros);
174
173
 
175
- WriteStringStreamToFile(fs, opener, ss, fs.JoinPath(info->file_path, "schema.sql"));
174
+ WriteStringStreamToFile(fs, ss, fs.JoinPath(info->file_path, "schema.sql"));
176
175
 
177
176
  // write the load.sql file
178
177
  // for every table, we write COPY INTO statement with the specified options
@@ -181,7 +180,7 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c
181
180
  auto exported_table_info = exported_tables.data[i].table_data;
182
181
  WriteCopyStatement(fs, load_ss, *info, exported_table_info, function);
183
182
  }
184
- WriteStringStreamToFile(fs, opener, load_ss, fs.JoinPath(info->file_path, "load.sql"));
183
+ WriteStringStreamToFile(fs, load_ss, fs.JoinPath(info->file_path, "load.sql"));
185
184
  state.finished = true;
186
185
 
187
186
  return SourceResultType::FINISHED;
@@ -11,7 +11,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile
11
11
  bool preserve_insertion_order = PhysicalPlanGenerator::PreserveInsertionOrder(context, *plan);
12
12
  bool supports_batch_index = PhysicalPlanGenerator::UseBatchIndex(context, *plan);
13
13
  auto &fs = FileSystem::GetFileSystem(context);
14
- op.file_path = fs.ExpandPath(op.file_path, FileSystem::GetFileOpener(context));
14
+ op.file_path = fs.ExpandPath(op.file_path);
15
15
  if (op.use_tmp_file) {
16
16
  op.file_path += ".tmp";
17
17
  }