duckdb 0.7.2-dev3515.0 → 0.7.2-dev3666.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/configure.py +2 -0
  2. package/package.json +1 -1
  3. package/src/database.cpp +1 -0
  4. package/src/duckdb/extension/json/buffered_json_reader.cpp +56 -17
  5. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +56 -31
  6. package/src/duckdb/extension/json/include/json_common.hpp +5 -4
  7. package/src/duckdb/extension/json/include/json_executors.hpp +13 -18
  8. package/src/duckdb/extension/json/include/json_functions.hpp +3 -0
  9. package/src/duckdb/extension/json/include/json_scan.hpp +106 -153
  10. package/src/duckdb/extension/json/include/json_transform.hpp +2 -2
  11. package/src/duckdb/extension/json/json_common.cpp +1 -1
  12. package/src/duckdb/extension/json/json_functions/copy_json.cpp +94 -38
  13. package/src/duckdb/extension/json/json_functions/json_contains.cpp +7 -8
  14. package/src/duckdb/extension/json/json_functions/json_create.cpp +7 -7
  15. package/src/duckdb/extension/json/json_functions/json_merge_patch.cpp +4 -4
  16. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +4 -4
  17. package/src/duckdb/extension/json/json_functions/json_structure.cpp +7 -5
  18. package/src/duckdb/extension/json/json_functions/json_transform.cpp +10 -8
  19. package/src/duckdb/extension/json/json_functions/json_valid.cpp +1 -1
  20. package/src/duckdb/extension/json/json_functions/read_json.cpp +167 -169
  21. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +37 -16
  22. package/src/duckdb/extension/json/json_functions.cpp +11 -4
  23. package/src/duckdb/extension/json/json_scan.cpp +593 -374
  24. package/src/duckdb/extension/parquet/parquet-extension.cpp +5 -0
  25. package/src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp +42 -0
  26. package/src/duckdb/src/catalog/catalog_search_path.cpp +5 -0
  27. package/src/duckdb/src/catalog/catalog_set.cpp +1 -1
  28. package/src/duckdb/src/common/constants.cpp +1 -0
  29. package/src/duckdb/src/common/file_system.cpp +26 -6
  30. package/src/duckdb/src/common/local_file_system.cpp +0 -13
  31. package/src/duckdb/src/common/types/vector.cpp +3 -3
  32. package/src/duckdb/src/common/types/vector_buffer.cpp +11 -3
  33. package/src/duckdb/src/common/types/vector_cache.cpp +5 -5
  34. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +12 -6
  35. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +10 -0
  36. package/src/duckdb/src/execution/operator/schema/physical_create_type.cpp +2 -2
  37. package/src/duckdb/src/function/macro_function.cpp +43 -0
  38. package/src/duckdb/src/function/pragma/pragma_queries.cpp +5 -3
  39. package/src/duckdb/src/function/scalar/strftime_format.cpp +1 -0
  40. package/src/duckdb/src/function/scalar_macro_function.cpp +10 -0
  41. package/src/duckdb/src/function/table/copy_csv.cpp +68 -18
  42. package/src/duckdb/src/function/table/read_csv.cpp +30 -3
  43. package/src/duckdb/src/function/table/version/pragma_version.cpp +8 -2
  44. package/src/duckdb/src/function/table_macro_function.cpp +10 -0
  45. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/column_dependency_manager.hpp +1 -1
  46. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/macro_catalog_entry.hpp +3 -1
  47. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp +0 -6
  48. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp +0 -6
  49. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
  50. package/src/duckdb/src/include/duckdb/catalog/similar_catalog_entry.hpp +1 -1
  51. package/src/duckdb/src/include/duckdb/common/constants.hpp +2 -0
  52. package/src/duckdb/src/include/duckdb/common/exception.hpp +3 -3
  53. package/src/duckdb/src/include/duckdb/common/field_writer.hpp +3 -3
  54. package/src/duckdb/src/include/duckdb/common/file_system.hpp +5 -0
  55. package/src/duckdb/src/include/duckdb/common/http_state.hpp +2 -1
  56. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +6 -6
  57. package/src/duckdb/src/include/duckdb/common/limits.hpp +46 -46
  58. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +8 -8
  59. package/src/duckdb/src/include/duckdb/common/operator/comparison_operators.hpp +6 -6
  60. package/src/duckdb/src/include/duckdb/common/operator/convert_to_string.hpp +1 -1
  61. package/src/duckdb/src/include/duckdb/common/operator/decimal_cast_operators.hpp +2 -4
  62. package/src/duckdb/src/include/duckdb/common/operator/string_cast.hpp +1 -1
  63. package/src/duckdb/src/include/duckdb/common/operator/subtract.hpp +1 -1
  64. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +1 -1
  65. package/src/duckdb/src/include/duckdb/common/re2_regex.hpp +1 -1
  66. package/src/duckdb/src/include/duckdb/common/string_util.hpp +7 -7
  67. package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +10 -10
  68. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +12 -12
  69. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_iterators.hpp +2 -2
  70. package/src/duckdb/src/include/duckdb/common/types/value.hpp +1 -1
  71. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +12 -2
  72. package/src/duckdb/src/include/duckdb/common/types.hpp +2 -2
  73. package/src/duckdb/src/include/duckdb/common/winapi.hpp +1 -1
  74. package/src/duckdb/src/include/duckdb/execution/expression_executor_state.hpp +1 -1
  75. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_reader_options.hpp +9 -5
  76. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_type.hpp +1 -1
  77. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +10 -14
  78. package/src/duckdb/src/include/duckdb/function/macro_function.hpp +7 -1
  79. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +3 -4
  80. package/src/duckdb/src/include/duckdb/function/scalar_macro_function.hpp +7 -2
  81. package/src/duckdb/src/include/duckdb/function/table_function.hpp +1 -1
  82. package/src/duckdb/src/include/duckdb/function/table_macro_function.hpp +5 -0
  83. package/src/duckdb/src/include/duckdb/function/udf_function.hpp +56 -50
  84. package/src/duckdb/src/include/duckdb/main/appender.hpp +2 -2
  85. package/src/duckdb/src/include/duckdb/main/client_context.hpp +2 -2
  86. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -1
  87. package/src/duckdb/src/include/duckdb/main/connection.hpp +8 -9
  88. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +1 -0
  89. package/src/duckdb/src/include/duckdb/main/query_result.hpp +3 -3
  90. package/src/duckdb/src/include/duckdb/main/relation.hpp +6 -7
  91. package/src/duckdb/src/include/duckdb/optimizer/optimizer_extension.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/parser/column_list.hpp +7 -7
  93. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +4 -7
  94. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_macro_info.hpp +8 -12
  95. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_sequence_info.hpp +6 -20
  96. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +6 -18
  97. package/src/duckdb/src/include/duckdb/parser/parsed_data/detach_info.hpp +4 -8
  98. package/src/duckdb/src/include/duckdb/parser/parsed_data/drop_info.hpp +4 -38
  99. package/src/duckdb/src/include/duckdb/parser/parsed_data/transaction_info.hpp +5 -2
  100. package/src/duckdb/src/include/duckdb/parser/parsed_data/vacuum_info.hpp +10 -10
  101. package/src/duckdb/src/include/duckdb/parser/parser_extension.hpp +2 -2
  102. package/src/duckdb/src/include/duckdb/parser/sql_statement.hpp +1 -1
  103. package/src/duckdb/src/include/duckdb/parser/statement/select_statement.hpp +1 -1
  104. package/src/duckdb/src/include/duckdb/planner/operator_extension.hpp +2 -2
  105. package/src/duckdb/src/include/duckdb/storage/storage_extension.hpp +2 -2
  106. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +42 -0
  107. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +0 -7
  108. package/src/duckdb/src/parser/parsed_data/create_info.cpp +19 -8
  109. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +46 -0
  110. package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +56 -0
  111. package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +47 -0
  112. package/src/duckdb/src/parser/parsed_data/detach_info.cpp +34 -0
  113. package/src/duckdb/src/parser/parsed_data/drop_info.cpp +46 -0
  114. package/src/duckdb/src/parser/parsed_data/transaction_info.cpp +24 -0
  115. package/src/duckdb/src/parser/parsed_data/vacuum_info.cpp +37 -0
  116. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +27 -9
  117. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +9 -4
  118. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +2 -1
  119. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +1 -0
  120. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +1 -1
  121. package/src/duckdb/src/planner/logical_operator.cpp +1 -2
  122. package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -25
  123. package/src/duckdb/src/planner/operator/logical_insert.cpp +30 -0
  124. package/src/duckdb/src/planner/operator/logical_simple.cpp +33 -5
  125. package/src/duckdb/src/planner/parsed_data/bound_create_table_info.cpp +6 -16
  126. package/src/duckdb/src/planner/planner.cpp +4 -13
  127. package/src/duckdb/src/storage/checkpoint_manager.cpp +12 -6
  128. package/src/duckdb/src/storage/single_file_block_manager.cpp +0 -4
  129. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  130. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +5735 -5773
  131. package/src/duckdb/ub_src_catalog_catalog_entry.cpp +1 -1
  132. package/src/duckdb/ub_src_parser_parsed_data.cpp +16 -0
  133. package/src/duckdb/src/catalog/catalog_entry/scalar_macro_catalog_entry.cpp +0 -104
@@ -724,6 +724,11 @@ unique_ptr<TableRef> ParquetScanReplacement(ClientContext &context, const string
724
724
  vector<unique_ptr<ParsedExpression>> children;
725
725
  children.push_back(make_uniq<ConstantExpression>(Value(table_name)));
726
726
  table_function->function = make_uniq<FunctionExpression>("parquet_scan", std::move(children));
727
+
728
+ if (!FileSystem::HasGlob(table_name)) {
729
+ table_function->alias = FileSystem::ExtractBaseName(table_name);
730
+ }
731
+
727
732
  return std::move(table_function);
728
733
  }
729
734
 
@@ -0,0 +1,42 @@
1
+ #include "duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp"
2
+ #include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp"
3
+ #include "duckdb/common/field_writer.hpp"
4
+ #include "duckdb/function/scalar_macro_function.hpp"
5
+
6
+ namespace duckdb {
7
+
8
+ MacroCatalogEntry::MacroCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateMacroInfo &info)
9
+ : FunctionEntry(
10
+ (info.function->type == MacroType::SCALAR_MACRO ? CatalogType::MACRO_ENTRY : CatalogType::TABLE_MACRO_ENTRY),
11
+ catalog, schema, info),
12
+ function(std::move(info.function)) {
13
+ this->temporary = info.temporary;
14
+ this->internal = info.internal;
15
+ }
16
+
17
+ ScalarMacroCatalogEntry::ScalarMacroCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateMacroInfo &info)
18
+ : MacroCatalogEntry(catalog, schema, info) {
19
+ }
20
+
21
+ TableMacroCatalogEntry::TableMacroCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateMacroInfo &info)
22
+ : MacroCatalogEntry(catalog, schema, info) {
23
+ }
24
+
25
+ unique_ptr<CreateMacroInfo> MacroCatalogEntry::GetInfoForSerialization() const {
26
+ auto info = make_uniq<CreateMacroInfo>(type);
27
+ info->catalog = catalog.GetName();
28
+ info->schema = schema.name;
29
+ info->name = name;
30
+ info->function = function->Copy();
31
+ return info;
32
+ }
33
+ void MacroCatalogEntry::Serialize(Serializer &serializer) const {
34
+ auto info = GetInfoForSerialization();
35
+ info->Serialize(serializer);
36
+ }
37
+
38
+ unique_ptr<CreateMacroInfo> MacroCatalogEntry::Deserialize(Deserializer &main_source, ClientContext &context) {
39
+ return unique_ptr_cast<CreateInfo, CreateMacroInfo>(CreateInfo::Deserialize(main_source));
40
+ }
41
+
42
+ } // namespace duckdb
@@ -145,6 +145,11 @@ void CatalogSearchPath::Set(vector<CatalogSearchEntry> new_paths, bool is_set_sc
145
145
  is_set_schema ? "schema" : "search_path", path.ToString());
146
146
  }
147
147
  }
148
+ if (is_set_schema) {
149
+ if (new_paths[0].catalog == TEMP_CATALOG || new_paths[0].catalog == SYSTEM_CATALOG) {
150
+ throw CatalogException("SET schema cannot be set to internal schema \"%s\"", new_paths[0].catalog);
151
+ }
152
+ }
148
153
  this->set_paths = std::move(new_paths);
149
154
  SetPaths(set_paths);
150
155
  }
@@ -84,7 +84,7 @@ bool CatalogSet::CreateEntry(CatalogTransaction transaction, const string &name,
84
84
  throw InternalException("Attempting to create temporary entry \"%s\" in non-temporary catalog", name);
85
85
  }
86
86
  if (!value->temporary && catalog.IsTemporaryCatalog() && name != DEFAULT_SCHEMA) {
87
- throw InternalException("Attempting to create non-temporary entry \"%s\" in temporary catalog", name);
87
+ throw InvalidInputException("Cannot create non-temporary entry \"%s\" in temporary catalog", name);
88
88
  }
89
89
  }
90
90
  // lock the catalog for writing
@@ -2,6 +2,7 @@
2
2
 
3
3
  #include "duckdb/common/limits.hpp"
4
4
  #include "duckdb/common/vector_size.hpp"
5
+ #include "duckdb/common/serializer.hpp"
5
6
 
6
7
  namespace duckdb {
7
8
 
@@ -305,6 +305,20 @@ void FileSystem::FileSync(FileHandle &handle) {
305
305
  throw NotImplementedException("%s: FileSync is not implemented!", GetName());
306
306
  }
307
307
 
308
+ bool FileSystem::HasGlob(const string &str) {
309
+ for (idx_t i = 0; i < str.size(); i++) {
310
+ switch (str[i]) {
311
+ case '*':
312
+ case '?':
313
+ case '[':
314
+ return true;
315
+ default:
316
+ break;
317
+ }
318
+ }
319
+ return false;
320
+ }
321
+
308
322
  vector<string> FileSystem::Glob(const string &path, FileOpener *opener) {
309
323
  throw NotImplementedException("%s: Glob is not implemented!", GetName());
310
324
  }
@@ -333,12 +347,8 @@ vector<string> FileSystem::GlobFiles(const string &pattern, ClientContext &conte
333
347
  auto result = Glob(pattern);
334
348
  if (result.empty()) {
335
349
  string required_extension;
336
- const string prefixes[] = {"http://", "https://", "s3://"};
337
- for (auto &prefix : prefixes) {
338
- if (StringUtil::StartsWith(pattern, prefix)) {
339
- required_extension = "httpfs";
340
- break;
341
- }
350
+ if (FileSystem::IsRemoteFile(pattern)) {
351
+ required_extension = "httpfs";
342
352
  }
343
353
  if (!required_extension.empty() && !context.db->ExtensionIsLoaded(required_extension)) {
344
354
  // an extension is required to read this file but it is not loaded - try to load it
@@ -455,4 +465,14 @@ FileType FileHandle::GetType() {
455
465
  return file_system.GetFileType(*this);
456
466
  }
457
467
 
468
+ bool FileSystem::IsRemoteFile(const string &path) {
469
+ const string prefixes[] = {"http://", "https://", "s3://"};
470
+ for (auto &prefix : prefixes) {
471
+ if (StringUtil::StartsWith(path, prefix)) {
472
+ return true;
473
+ }
474
+ }
475
+ return false;
476
+ }
477
+
458
478
  } // namespace duckdb
@@ -819,19 +819,6 @@ idx_t LocalFileSystem::SeekPosition(FileHandle &handle) {
819
819
  return GetFilePointer(handle);
820
820
  }
821
821
 
822
- static bool HasGlob(const string &str) {
823
- for (idx_t i = 0; i < str.size(); i++) {
824
- switch (str[i]) {
825
- case '*':
826
- case '?':
827
- case '[':
828
- return true;
829
- default:
830
- break;
831
- }
832
- }
833
- return false;
834
- }
835
822
  static bool IsCrawl(const string &glob) {
836
823
  // glob must match exactly
837
824
  return glob == "**";
@@ -1868,7 +1868,7 @@ idx_t ListVector::GetListSize(const Vector &vec) {
1868
1868
  return ListVector::GetListSize(child);
1869
1869
  }
1870
1870
  D_ASSERT(vec.auxiliary);
1871
- return ((VectorListBuffer &)*vec.auxiliary).size;
1871
+ return ((VectorListBuffer &)*vec.auxiliary).GetSize();
1872
1872
  }
1873
1873
 
1874
1874
  idx_t ListVector::GetListCapacity(const Vector &vec) {
@@ -1877,7 +1877,7 @@ idx_t ListVector::GetListCapacity(const Vector &vec) {
1877
1877
  return ListVector::GetListSize(child);
1878
1878
  }
1879
1879
  D_ASSERT(vec.auxiliary);
1880
- return ((VectorListBuffer &)*vec.auxiliary).capacity;
1880
+ return ((VectorListBuffer &)*vec.auxiliary).GetCapacity();
1881
1881
  }
1882
1882
 
1883
1883
  void ListVector::ReferenceEntry(Vector &vector, Vector &other) {
@@ -1894,7 +1894,7 @@ void ListVector::SetListSize(Vector &vec, idx_t size) {
1894
1894
  auto &child = DictionaryVector::Child(vec);
1895
1895
  ListVector::SetListSize(child, size);
1896
1896
  }
1897
- ((VectorListBuffer &)*vec.auxiliary).size = size;
1897
+ ((VectorListBuffer &)*vec.auxiliary).SetSize(size);
1898
1898
  }
1899
1899
 
1900
1900
  void ListVector::Append(Vector &target, const Vector &source, idx_t source_size, idx_t source_offset) {
@@ -58,12 +58,12 @@ VectorStructBuffer::~VectorStructBuffer() {
58
58
  }
59
59
 
60
60
  VectorListBuffer::VectorListBuffer(unique_ptr<Vector> vector, idx_t initial_capacity)
61
- : VectorBuffer(VectorBufferType::LIST_BUFFER), capacity(initial_capacity), child(std::move(vector)) {
61
+ : VectorBuffer(VectorBufferType::LIST_BUFFER), child(std::move(vector)), capacity(initial_capacity) {
62
62
  }
63
63
 
64
64
  VectorListBuffer::VectorListBuffer(const LogicalType &list_type, idx_t initial_capacity)
65
- : VectorBuffer(VectorBufferType::LIST_BUFFER), capacity(initial_capacity),
66
- child(make_uniq<Vector>(ListType::GetChildType(list_type), initial_capacity)) {
65
+ : VectorBuffer(VectorBufferType::LIST_BUFFER),
66
+ child(make_uniq<Vector>(ListType::GetChildType(list_type), initial_capacity)), capacity(initial_capacity) {
67
67
  }
68
68
 
69
69
  void VectorListBuffer::Reserve(idx_t to_reserve) {
@@ -96,6 +96,14 @@ void VectorListBuffer::PushBack(const Value &insert) {
96
96
  child->SetValue(size++, insert);
97
97
  }
98
98
 
99
+ void VectorListBuffer::SetCapacity(idx_t new_capacity) {
100
+ this->capacity = new_capacity;
101
+ }
102
+
103
+ void VectorListBuffer::SetSize(idx_t new_size) {
104
+ this->size = new_size;
105
+ }
106
+
99
107
  VectorListBuffer::~VectorListBuffer() {
100
108
  }
101
109
 
@@ -16,7 +16,7 @@ public:
16
16
  owned_data = allocator.Allocate(capacity * GetTypeIdSize(internal_type));
17
17
  // child data of the list
18
18
  auto &child_type = ListType::GetChildType(type);
19
- child_caches.push_back(make_buffer<VectorCacheBuffer>(allocator, child_type));
19
+ child_caches.push_back(make_buffer<VectorCacheBuffer>(allocator, child_type, capacity));
20
20
  auto child_vector = make_uniq<Vector>(child_type, false, false);
21
21
  auxiliary = make_shared<VectorListBuffer>(std::move(child_vector));
22
22
  break;
@@ -24,7 +24,7 @@ public:
24
24
  case PhysicalType::STRUCT: {
25
25
  auto &child_types = StructType::GetChildTypes(type);
26
26
  for (auto &child_type : child_types) {
27
- child_caches.push_back(make_buffer<VectorCacheBuffer>(allocator, child_type.second));
27
+ child_caches.push_back(make_buffer<VectorCacheBuffer>(allocator, child_type.second, capacity));
28
28
  }
29
29
  auto struct_buffer = make_shared<VectorStructBuffer>(type);
30
30
  auxiliary = std::move(struct_buffer);
@@ -48,13 +48,13 @@ public:
48
48
  // reinitialize the VectorListBuffer
49
49
  AssignSharedPointer(result.auxiliary, auxiliary);
50
50
  // propagate through child
51
+ auto &child_cache = (VectorCacheBuffer &)*child_caches[0];
51
52
  auto &list_buffer = (VectorListBuffer &)*result.auxiliary;
52
- list_buffer.capacity = capacity;
53
- list_buffer.size = 0;
53
+ list_buffer.SetCapacity(child_cache.capacity);
54
+ list_buffer.SetSize(0);
54
55
  list_buffer.SetAuxiliaryData(nullptr);
55
56
 
56
57
  auto &list_child = list_buffer.GetChild();
57
- auto &child_cache = (VectorCacheBuffer &)*child_caches[0];
58
58
  child_cache.ResetFromCache(list_child, child_caches[0]);
59
59
  break;
60
60
  }
@@ -1172,6 +1172,16 @@ void BufferedCSVReader::SkipEmptyLines() {
1172
1172
  }
1173
1173
  }
1174
1174
 
1175
+ void UpdateMaxLineLength(ClientContext &context, idx_t line_length) {
1176
+ if (!context.client_data->debug_set_max_line_length) {
1177
+ return;
1178
+ }
1179
+ if (line_length < context.client_data->debug_max_line_length) {
1180
+ return;
1181
+ }
1182
+ context.client_data->debug_max_line_length = line_length;
1183
+ }
1184
+
1175
1185
  bool BufferedCSVReader::TryParseSimpleCSV(DataChunk &insert_chunk, string &error_message) {
1176
1186
  // used for parsing algorithm
1177
1187
  bool finished_chunk = false;
@@ -1239,9 +1249,7 @@ add_row : {
1239
1249
  return false;
1240
1250
  }
1241
1251
  finished_chunk = AddRow(insert_chunk, column, error_message);
1242
- if (context.client_data->max_line_length < position - line_start) {
1243
- context.client_data->max_line_length = position - line_start;
1244
- }
1252
+ UpdateMaxLineLength(context, position - line_start);
1245
1253
  if (!error_message.empty()) {
1246
1254
  return false;
1247
1255
  }
@@ -1379,9 +1387,7 @@ final_state:
1379
1387
  AddValue(string_t(buffer.get() + start, position - start - offset), column, escape_positions, has_quotes);
1380
1388
  finished_chunk = AddRow(insert_chunk, column, error_message);
1381
1389
  SkipEmptyLines();
1382
- if (context.client_data->max_line_length < position - line_start) {
1383
- context.client_data->max_line_length = position - line_start;
1384
- }
1390
+ UpdateMaxLineLength(context, position - line_start);
1385
1391
  if (!error_message.empty()) {
1386
1392
  return false;
1387
1393
  }
@@ -183,6 +183,12 @@ void BufferedCSVReaderOptions::SetReadOption(const string &loption, const Value
183
183
  }
184
184
 
185
185
  void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value &value) {
186
+ if (loption == "new_line") {
187
+ // Steal this from SetBaseOption so we can write different newlines (e.g., format JSON ARRAY)
188
+ write_newline = ParseString(value, loption);
189
+ return;
190
+ }
191
+
186
192
  if (SetBaseOption(loption, value)) {
187
193
  return;
188
194
  }
@@ -199,6 +205,10 @@ void BufferedCSVReaderOptions::SetWriteOption(const string &loption, const Value
199
205
  }
200
206
  SetDateFormat(LogicalTypeId::TIMESTAMP, format, false);
201
207
  SetDateFormat(LogicalTypeId::TIMESTAMP_TZ, format, false);
208
+ } else if (loption == "prefix") {
209
+ prefix = ParseString(value, loption);
210
+ } else if (loption == "suffix") {
211
+ suffix = ParseString(value, loption);
202
212
  } else {
203
213
  throw BinderException("Unrecognized option CSV writer \"%s\"", loption);
204
214
  }
@@ -7,9 +7,9 @@
7
7
 
8
8
  namespace duckdb {
9
9
 
10
- PhysicalCreateType::PhysicalCreateType(unique_ptr<CreateTypeInfo> info, idx_t estimated_cardinality)
10
+ PhysicalCreateType::PhysicalCreateType(unique_ptr<CreateTypeInfo> info_p, idx_t estimated_cardinality)
11
11
  : PhysicalOperator(PhysicalOperatorType::CREATE_TYPE, {LogicalType::BIGINT}, estimated_cardinality),
12
- info(std::move(info)) {
12
+ info(std::move(info_p)) {
13
13
  }
14
14
 
15
15
  //===--------------------------------------------------------------------===//
@@ -5,6 +5,7 @@
5
5
  #include "duckdb/catalog/catalog_entry/scalar_macro_catalog_entry.hpp"
6
6
  #include "duckdb/common/string_util.hpp"
7
7
  #include "duckdb/function/scalar_macro_function.hpp"
8
+ #include "duckdb/function/table_macro_function.hpp"
8
9
  #include "duckdb/parser/expression/columnref_expression.hpp"
9
10
  #include "duckdb/parser/expression/comparison_expression.hpp"
10
11
  #include "duckdb/parser/expression/function_expression.hpp"
@@ -91,4 +92,46 @@ string MacroFunction::ToSQL(const string &schema, const string &name) const {
91
92
  return StringUtil::Format("CREATE MACRO %s.%s(%s) AS ", schema, name, StringUtil::Join(param_strings, ", "));
92
93
  }
93
94
 
95
+ void MacroFunction::Serialize(Serializer &main_serializer) const {
96
+ FieldWriter writer(main_serializer);
97
+ writer.WriteField(type);
98
+ writer.WriteSerializableList(parameters);
99
+ writer.WriteField<uint32_t>((uint32_t)default_parameters.size());
100
+ auto &serializer = writer.GetSerializer();
101
+ for (auto &kv : default_parameters) {
102
+ serializer.WriteString(kv.first);
103
+ kv.second->Serialize(serializer);
104
+ }
105
+ SerializeInternal(writer);
106
+ writer.Finalize();
107
+ }
108
+
109
+ unique_ptr<MacroFunction> MacroFunction::Deserialize(Deserializer &main_source) {
110
+ FieldReader reader(main_source);
111
+ auto type = reader.ReadRequired<MacroType>();
112
+ auto parameters = reader.ReadRequiredSerializableList<ParsedExpression>();
113
+ auto default_param_count = reader.ReadRequired<uint32_t>();
114
+ unordered_map<string, unique_ptr<ParsedExpression>> default_parameters;
115
+ auto &source = reader.GetSource();
116
+ for (idx_t i = 0; i < default_param_count; i++) {
117
+ auto name = source.Read<string>();
118
+ default_parameters[name] = ParsedExpression::Deserialize(source);
119
+ }
120
+ unique_ptr<MacroFunction> result;
121
+ switch (type) {
122
+ case MacroType::SCALAR_MACRO:
123
+ result = ScalarMacroFunction::Deserialize(reader);
124
+ break;
125
+ case MacroType::TABLE_MACRO:
126
+ result = TableMacroFunction::Deserialize(reader);
127
+ break;
128
+ default:
129
+ throw InternalException("Cannot deserialize macro type");
130
+ }
131
+ result->parameters = std::move(parameters);
132
+ result->default_parameters = std::move(default_parameters);
133
+ reader.Finalize();
134
+ return result;
135
+ }
136
+
94
137
  } // namespace duckdb
@@ -50,15 +50,17 @@ string PragmaShowTables(ClientContext &context, const FunctionParameters &parame
50
50
  string PragmaShowTablesExpanded(ClientContext &context, const FunctionParameters &parameters) {
51
51
  return R"(
52
52
  SELECT
53
+ t.database_name AS database,
54
+ t.schema_name AS schema,
53
55
  t.table_name,
54
56
  LIST(c.column_name order by c.column_index) AS column_names,
55
57
  LIST(c.data_type order by c.column_index) AS column_types,
56
- FIRST(t.temporary) AS temporary
58
+ FIRST(t.temporary) AS temporary,
57
59
  FROM duckdb_tables t
58
60
  JOIN duckdb_columns c
59
61
  USING (table_oid)
60
- GROUP BY t.table_name
61
- ORDER BY t.table_name;
62
+ GROUP BY t.database_name, t.schema_name, t.table_name
63
+ ORDER BY t.database_name, t.schema_name, t.table_name;
62
64
  )";
63
65
  }
64
66
 
@@ -417,6 +417,7 @@ string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeF
417
417
  if (format_string.empty()) {
418
418
  return "Empty format string";
419
419
  }
420
+ format.format_specifier = format_string;
420
421
  format.specifiers.clear();
421
422
  format.literals.clear();
422
423
  format.numeric_width.clear();
@@ -49,4 +49,14 @@ string ScalarMacroFunction::ToSQL(const string &schema, const string &name) cons
49
49
  return MacroFunction::ToSQL(schema, name) + StringUtil::Format("(%s);", expression_copy->ToString());
50
50
  }
51
51
 
52
+ void ScalarMacroFunction::SerializeInternal(FieldWriter &writer) const {
53
+ writer.WriteSerializable(*expression);
54
+ }
55
+
56
+ unique_ptr<MacroFunction> ScalarMacroFunction::Deserialize(FieldReader &reader) {
57
+ auto result = make_uniq<ScalarMacroFunction>();
58
+ result->expression = reader.ReadRequiredSerializable<ParsedExpression>();
59
+ return std::move(result);
60
+ }
61
+
52
62
  } // namespace duckdb
@@ -1,15 +1,16 @@
1
1
  #include "duckdb/common/bind_helpers.hpp"
2
2
  #include "duckdb/common/file_system.hpp"
3
+ #include "duckdb/common/multi_file_reader.hpp"
3
4
  #include "duckdb/common/serializer/buffered_serializer.hpp"
4
5
  #include "duckdb/common/string_util.hpp"
6
+ #include "duckdb/common/types/column/column_data_collection.hpp"
5
7
  #include "duckdb/common/types/string_type.hpp"
6
8
  #include "duckdb/common/vector_operations/vector_operations.hpp"
7
9
  #include "duckdb/function/copy_function.hpp"
8
10
  #include "duckdb/function/scalar/string_functions.hpp"
9
11
  #include "duckdb/function/table/read_csv.hpp"
10
12
  #include "duckdb/parser/parsed_data/copy_info.hpp"
11
- #include "duckdb/common/multi_file_reader.hpp"
12
- #include "duckdb/common/types/column/column_data_collection.hpp"
13
+
13
14
  #include <limits>
14
15
 
15
16
  namespace duckdb {
@@ -57,6 +58,15 @@ void BaseCSVData::Finalize() {
57
58
  SubstringDetection(options.escape, options.null_str, "ESCAPE", "NULL");
58
59
  }
59
60
  }
61
+
62
+ if (!options.prefix.empty() || !options.suffix.empty()) {
63
+ if (options.prefix.empty() || options.suffix.empty()) {
64
+ throw BinderException("COPY ... (FORMAT CSV) must have both PREFIX and SUFFIX, or none at all");
65
+ }
66
+ if (options.header) {
67
+ throw BinderException("COPY ... (FORMAT CSV)'s HEADER cannot be combined with PREFIX/SUFFIX");
68
+ }
69
+ }
60
70
  }
61
71
 
62
72
  static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &info, vector<string> &names,
@@ -85,6 +95,9 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyInfo &i
85
95
  bind_data->requires_quotes[bind_data->options.delimiter[0]] = true;
86
96
  bind_data->requires_quotes[bind_data->options.quote[0]] = true;
87
97
  }
98
+ if (!bind_data->options.write_newline.empty()) {
99
+ bind_data->newline = bind_data->options.write_newline;
100
+ }
88
101
  return std::move(bind_data);
89
102
  }
90
103
 
@@ -251,24 +264,41 @@ struct LocalWriteCSVData : public LocalFunctionData {
251
264
  BufferedSerializer serializer;
252
265
  //! A chunk with VARCHAR columns to cast intermediates into
253
266
  DataChunk cast_chunk;
267
+ //! If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY
268
+ bool written_anything = false;
254
269
  };
255
270
 
256
271
  struct GlobalWriteCSVData : public GlobalFunctionData {
257
- GlobalWriteCSVData(FileSystem &fs, const string &file_path, FileCompressionType compression) : fs(fs) {
272
+ GlobalWriteCSVData(FileSystem &fs, const string &file_path, FileCompressionType compression)
273
+ : fs(fs), written_anything(false) {
258
274
  handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW,
259
275
  FileLockType::WRITE_LOCK, compression);
260
276
  }
261
277
 
278
+ //! Write generic data, e.g., CSV header
262
279
  void WriteData(const_data_ptr_t data, idx_t size) {
263
280
  lock_guard<mutex> flock(lock);
264
281
  handle->Write((void *)data, size);
265
282
  }
266
283
 
284
+ //! Write rows
285
+ void WriteRows(const_data_ptr_t data, idx_t size, const string &newline) {
286
+ lock_guard<mutex> flock(lock);
287
+ if (written_anything) {
288
+ handle->Write((void *)newline.c_str(), newline.length());
289
+ } else {
290
+ written_anything = true;
291
+ }
292
+ handle->Write((void *)data, size);
293
+ }
294
+
267
295
  FileSystem &fs;
268
296
  //! The mutex for writing to the physical file
269
297
  mutex lock;
270
298
  //! The file handle to write to
271
299
  unique_ptr<FileHandle> handle;
300
+ //! If we've written any rows yet, allows us to prevent a trailing comma when writing JSON ARRAY
301
+ bool written_anything;
272
302
  };
273
303
 
274
304
  static unique_ptr<LocalFunctionData> WriteCSVInitializeLocal(ExecutionContext &context, FunctionData &bind_data) {
@@ -290,6 +320,10 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
290
320
  auto global_data =
291
321
  make_uniq<GlobalWriteCSVData>(FileSystem::GetFileSystem(context), file_path, options.compression);
292
322
 
323
+ if (!options.prefix.empty()) {
324
+ global_data->WriteData((const_data_ptr_t)options.prefix.c_str(), options.prefix.size());
325
+ }
326
+
293
327
  if (options.header) {
294
328
  BufferedSerializer serializer;
295
329
  // write the header line to the file
@@ -304,11 +338,12 @@ static unique_ptr<GlobalFunctionData> WriteCSVInitializeGlobal(ClientContext &co
304
338
 
305
339
  global_data->WriteData(serializer.blob.data.get(), serializer.blob.size);
306
340
  }
341
+
307
342
  return std::move(global_data);
308
343
  }
309
344
 
310
345
  static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_data, DataChunk &cast_chunk,
311
- BufferedSerializer &writer, DataChunk &input) {
346
+ BufferedSerializer &writer, DataChunk &input, bool &written_anything) {
312
347
  auto &csv_data = bind_data.Cast<WriteCSVData>();
313
348
  auto &options = csv_data.options;
314
349
 
@@ -324,15 +359,11 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
324
359
  csv_data.options.write_date_format[LogicalTypeId::DATE].ConvertDateVector(
325
360
  input.data[col_idx], cast_chunk.data[col_idx], input.size());
326
361
  } else if (options.has_format[LogicalTypeId::TIMESTAMP] &&
327
- csv_data.sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP) {
362
+ (csv_data.sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP ||
363
+ csv_data.sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP_TZ)) {
328
364
  // use the timestamp format to cast the chunk
329
365
  csv_data.options.write_date_format[LogicalTypeId::TIMESTAMP].ConvertTimestampVector(
330
366
  input.data[col_idx], cast_chunk.data[col_idx], input.size());
331
- } else if (options.has_format[LogicalTypeId::TIMESTAMP_TZ] &&
332
- csv_data.sql_types[col_idx].id() == LogicalTypeId::TIMESTAMP_TZ) {
333
- // use the timestamp format to cast the chunk
334
- csv_data.options.write_date_format[LogicalTypeId::TIMESTAMP_TZ].ConvertTimestampVector(
335
- input.data[col_idx], cast_chunk.data[col_idx], input.size());
336
367
  } else {
337
368
  // non varchar column, perform the cast
338
369
  VectorOperations::Cast(context, input.data[col_idx], cast_chunk.data[col_idx], input.size());
@@ -342,6 +373,11 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
342
373
  cast_chunk.Flatten();
343
374
  // now loop over the vectors and output the values
344
375
  for (idx_t row_idx = 0; row_idx < cast_chunk.size(); row_idx++) {
376
+ if (row_idx == 0 && !written_anything) {
377
+ written_anything = true;
378
+ } else {
379
+ writer.WriteBufferData(csv_data.newline);
380
+ }
345
381
  // write values
346
382
  for (idx_t col_idx = 0; col_idx < cast_chunk.ColumnCount(); col_idx++) {
347
383
  if (col_idx != 0) {
@@ -361,7 +397,6 @@ static void WriteCSVChunkInternal(ClientContext &context, FunctionData &bind_dat
361
397
  WriteQuotedString(writer, csv_data, str_data[row_idx].GetData(), str_data[row_idx].GetSize(),
362
398
  csv_data.options.force_quote[col_idx]);
363
399
  }
364
- writer.WriteBufferData(csv_data.newline);
365
400
  }
366
401
  }
367
402
 
@@ -372,13 +407,15 @@ static void WriteCSVSink(ExecutionContext &context, FunctionData &bind_data, Glo
372
407
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
373
408
 
374
409
  // write data into the local buffer
375
- WriteCSVChunkInternal(context.client, bind_data, local_data.cast_chunk, local_data.serializer, input);
410
+ WriteCSVChunkInternal(context.client, bind_data, local_data.cast_chunk, local_data.serializer, input,
411
+ local_data.written_anything);
376
412
 
377
413
  // check if we should flush what we have currently written
378
414
  auto &writer = local_data.serializer;
379
415
  if (writer.blob.size >= csv_data.flush_size) {
380
- global_state.WriteData(writer.blob.data.get(), writer.blob.size);
416
+ global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
381
417
  writer.Reset();
418
+ local_data.written_anything = false;
382
419
  }
383
420
  }
384
421
 
@@ -389,10 +426,11 @@ static void WriteCSVCombine(ExecutionContext &context, FunctionData &bind_data,
389
426
  LocalFunctionData &lstate) {
390
427
  auto &local_data = lstate.Cast<LocalWriteCSVData>();
391
428
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
429
+ auto &csv_data = bind_data.Cast<WriteCSVData>();
392
430
  auto &writer = local_data.serializer;
393
431
  // flush the local writer
394
- if (writer.blob.size > 0) {
395
- global_state.WriteData(writer.blob.data.get(), writer.blob.size);
432
+ if (local_data.written_anything) {
433
+ global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
396
434
  writer.Reset();
397
435
  }
398
436
  }
@@ -402,6 +440,16 @@ static void WriteCSVCombine(ExecutionContext &context, FunctionData &bind_data,
402
440
  //===--------------------------------------------------------------------===//
403
441
  void WriteCSVFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) {
404
442
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
443
+ auto &csv_data = bind_data.Cast<WriteCSVData>();
444
+ auto &options = csv_data.options;
445
+
446
+ BufferedSerializer serializer;
447
+ if (!options.suffix.empty()) {
448
+ serializer.WriteBufferData(options.suffix);
449
+ } else if (global_state.written_anything) {
450
+ serializer.WriteBufferData(csv_data.newline);
451
+ }
452
+ global_state.WriteData(serializer.blob.data.get(), serializer.blob.size);
405
453
 
406
454
  global_state.handle->Close();
407
455
  global_state.handle.reset();
@@ -438,10 +486,11 @@ unique_ptr<PreparedBatchData> WriteCSVPrepareBatch(ClientContext &context, Funct
438
486
  DataChunk cast_chunk;
439
487
  cast_chunk.Initialize(Allocator::Get(context), types);
440
488
 
441
- auto batch = make_uniq<WriteCSVBatchData>();
442
489
  // write CSV chunks to the batch data
490
+ bool written_anything = false;
491
+ auto batch = make_uniq<WriteCSVBatchData>();
443
492
  for (auto &chunk : collection->Chunks()) {
444
- WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->serializer, chunk);
493
+ WriteCSVChunkInternal(context, bind_data, cast_chunk, batch->serializer, chunk, written_anything);
445
494
  }
446
495
  return std::move(batch);
447
496
  }
@@ -453,8 +502,9 @@ void WriteCSVFlushBatch(ClientContext &context, FunctionData &bind_data, GlobalF
453
502
  PreparedBatchData &batch) {
454
503
  auto &csv_batch = batch.Cast<WriteCSVBatchData>();
455
504
  auto &global_state = gstate.Cast<GlobalWriteCSVData>();
505
+ auto &csv_data = bind_data.Cast<WriteCSVData>();
456
506
  auto &writer = csv_batch.serializer;
457
- global_state.WriteData(writer.blob.data.get(), writer.blob.size);
507
+ global_state.WriteRows(writer.blob.data.get(), writer.blob.size, csv_data.newline);
458
508
  writer.Reset();
459
509
  }
460
510