duckdb 1.3.1-dev6.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/package.json +1 -1
  2. package/src/duckdb/extension/core_functions/aggregate/distributive/arg_min_max.cpp +27 -39
  3. package/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp +2 -3
  4. package/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_sort_tree.hpp +1 -1
  5. package/src/duckdb/extension/core_functions/lambda_functions.cpp +16 -14
  6. package/src/duckdb/extension/core_functions/scalar/list/list_filter.cpp +3 -2
  7. package/src/duckdb/extension/core_functions/scalar/list/list_reduce.cpp +46 -10
  8. package/src/duckdb/extension/core_functions/scalar/list/list_transform.cpp +3 -2
  9. package/src/duckdb/extension/core_functions/scalar/random/random.cpp +3 -1
  10. package/src/duckdb/extension/icu/icu-datefunc.cpp +5 -3
  11. package/src/duckdb/extension/icu/icu-strptime.cpp +6 -1
  12. package/src/duckdb/extension/icu/icu-timezone.cpp +4 -0
  13. package/src/duckdb/extension/icu/icu_extension.cpp +7 -2
  14. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +1 -1
  15. package/src/duckdb/extension/icu/include/icu-helpers.hpp +1 -1
  16. package/src/duckdb/extension/icu/third_party/icu/common/uloc.cpp +5 -5
  17. package/src/duckdb/extension/json/include/json_common.hpp +19 -0
  18. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -4
  19. package/src/duckdb/extension/json/include/json_functions.hpp +4 -4
  20. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +38 -17
  21. package/src/duckdb/extension/json/json_functions/json_table_in_out.cpp +11 -7
  22. package/src/duckdb/extension/json/json_functions.cpp +4 -4
  23. package/src/duckdb/extension/json/json_reader.cpp +1 -1
  24. package/src/duckdb/extension/parquet/column_reader.cpp +7 -1
  25. package/src/duckdb/extension/parquet/include/parquet_bss_decoder.hpp +2 -2
  26. package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +2 -2
  27. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +2 -1
  28. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +1 -1
  29. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +3 -0
  30. package/src/duckdb/extension/parquet/include/writer/parquet_write_operators.hpp +3 -1
  31. package/src/duckdb/extension/parquet/include/writer/templated_column_writer.hpp +1 -1
  32. package/src/duckdb/extension/parquet/parquet_crypto.cpp +9 -5
  33. package/src/duckdb/extension/parquet/parquet_extension.cpp +26 -0
  34. package/src/duckdb/extension/parquet/parquet_float16.cpp +4 -2
  35. package/src/duckdb/extension/parquet/parquet_metadata.cpp +3 -3
  36. package/src/duckdb/extension/parquet/parquet_multi_file_info.cpp +12 -0
  37. package/src/duckdb/extension/parquet/parquet_reader.cpp +5 -4
  38. package/src/duckdb/extension/parquet/parquet_statistics.cpp +13 -3
  39. package/src/duckdb/extension/parquet/parquet_writer.cpp +1 -1
  40. package/src/duckdb/extension/parquet/reader/decimal_column_reader.cpp +1 -1
  41. package/src/duckdb/extension/parquet/reader/string_column_reader.cpp +1 -1
  42. package/src/duckdb/extension/parquet/reader/struct_column_reader.cpp +13 -4
  43. package/src/duckdb/extension/parquet/serialize_parquet.cpp +2 -0
  44. package/src/duckdb/src/catalog/catalog.cpp +10 -4
  45. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +4 -10
  46. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +1 -2
  47. package/src/duckdb/src/catalog/catalog_entry/sequence_catalog_entry.cpp +1 -1
  48. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +2 -2
  49. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +1 -1
  50. package/src/duckdb/src/catalog/catalog_search_path.cpp +7 -1
  51. package/src/duckdb/src/catalog/catalog_set.cpp +21 -1
  52. package/src/duckdb/src/common/adbc/adbc.cpp +1 -1
  53. package/src/duckdb/src/common/arrow/arrow_appender.cpp +17 -5
  54. package/src/duckdb/src/common/arrow/arrow_converter.cpp +23 -15
  55. package/src/duckdb/src/common/box_renderer.cpp +1 -2
  56. package/src/duckdb/src/common/enum_util.cpp +4 -3
  57. package/src/duckdb/src/common/local_file_system.cpp +13 -12
  58. package/src/duckdb/src/common/multi_file/multi_file_column_mapper.cpp +35 -12
  59. package/src/duckdb/src/common/multi_file/multi_file_reader.cpp +13 -3
  60. package/src/duckdb/src/common/string_util.cpp +7 -5
  61. package/src/duckdb/src/common/tree_renderer/graphviz_tree_renderer.cpp +4 -4
  62. package/src/duckdb/src/common/tree_renderer/html_tree_renderer.cpp +4 -4
  63. package/src/duckdb/src/common/tree_renderer/json_tree_renderer.cpp +4 -4
  64. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +4 -4
  65. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +1 -1
  66. package/src/duckdb/src/common/types/uuid.cpp +5 -1
  67. package/src/duckdb/src/common/types.cpp +28 -0
  68. package/src/duckdb/src/common/virtual_file_system.cpp +5 -0
  69. package/src/duckdb/src/execution/column_binding_resolver.cpp +49 -30
  70. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +4 -0
  71. package/src/duckdb/src/execution/join_hashtable.cpp +10 -7
  72. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
  73. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +1 -1
  74. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +2 -1
  75. package/src/duckdb/src/execution/operator/csv_scanner/scanner/skip_scanner.cpp +1 -4
  76. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +53 -1
  77. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +58 -59
  78. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +10 -5
  79. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +4 -0
  80. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +18 -8
  81. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +1 -1
  82. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +1 -0
  83. package/src/duckdb/src/execution/physical_plan_generator.cpp +5 -5
  84. package/src/duckdb/src/function/cast/vector_cast_helpers.cpp +2 -1
  85. package/src/duckdb/src/function/function.cpp +4 -0
  86. package/src/duckdb/src/function/scalar/operator/arithmetic.cpp +6 -0
  87. package/src/duckdb/src/function/scalar/struct/remap_struct.cpp +10 -1
  88. package/src/duckdb/src/function/table/copy_csv.cpp +1 -0
  89. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  90. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +1 -0
  91. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +1 -1
  92. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +1 -1
  93. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +2 -0
  94. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +2 -2
  95. package/src/duckdb/src/include/duckdb/common/helper.hpp +9 -9
  96. package/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +1 -1
  97. package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_column_mapper.hpp +3 -5
  98. package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_reader.hpp +7 -0
  99. package/src/duckdb/src/include/duckdb/common/multi_file/multi_file_states.hpp +3 -0
  100. package/src/duckdb/src/include/duckdb/common/shadow_forbidden_functions.hpp +40 -0
  101. package/src/duckdb/src/include/duckdb/common/string.hpp +25 -2
  102. package/src/duckdb/src/include/duckdb/common/types/hugeint.hpp +20 -24
  103. package/src/duckdb/src/include/duckdb/common/types/uhugeint.hpp +20 -24
  104. package/src/duckdb/src/include/duckdb/common/types.hpp +3 -0
  105. package/src/duckdb/src/include/duckdb/common/unique_ptr.hpp +34 -8
  106. package/src/duckdb/src/include/duckdb/execution/column_binding_resolver.hpp +1 -0
  107. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +3 -2
  108. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/column_count_scanner.hpp +3 -0
  109. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +1 -1
  110. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +15 -3
  111. package/src/duckdb/src/include/duckdb/function/cast/vector_cast_helpers.hpp +2 -2
  112. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +7 -3
  113. package/src/duckdb/src/include/duckdb/function/function.hpp +1 -0
  114. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +2 -1
  115. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +20 -12
  116. package/src/duckdb/src/include/duckdb/function/lambda_functions.hpp +4 -3
  117. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +3 -1
  118. package/src/duckdb/src/include/duckdb/logging/log_type.hpp +17 -0
  119. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +1 -0
  120. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +22 -6
  121. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  122. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +4 -1
  123. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +27 -13
  124. package/src/duckdb/src/include/duckdb/main/secret/secret_manager.hpp +1 -0
  125. package/src/duckdb/src/include/duckdb/main/settings.hpp +11 -0
  126. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +7 -1
  127. package/src/duckdb/src/include/duckdb/original/std/locale.hpp +10 -0
  128. package/src/duckdb/src/include/duckdb/original/std/memory.hpp +12 -0
  129. package/src/duckdb/src/include/duckdb/original/std/sstream.hpp +11 -0
  130. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +5 -3
  131. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +4 -2
  132. package/src/duckdb/src/logging/log_manager.cpp +1 -0
  133. package/src/duckdb/src/logging/log_types.cpp +40 -0
  134. package/src/duckdb/src/main/attached_database.cpp +4 -0
  135. package/src/duckdb/src/main/client_context.cpp +1 -0
  136. package/src/duckdb/src/main/config.cpp +1 -0
  137. package/src/duckdb/src/main/database.cpp +1 -0
  138. package/src/duckdb/src/main/database_manager.cpp +19 -2
  139. package/src/duckdb/src/main/extension/extension_helper.cpp +4 -3
  140. package/src/duckdb/src/main/query_profiler.cpp +2 -2
  141. package/src/duckdb/src/main/query_result.cpp +1 -1
  142. package/src/duckdb/src/main/secret/secret_manager.cpp +2 -0
  143. package/src/duckdb/src/main/settings/autogenerated_settings.cpp +7 -0
  144. package/src/duckdb/src/main/settings/custom_settings.cpp +106 -34
  145. package/src/duckdb/src/optimizer/optimizer.cpp +1 -1
  146. package/src/duckdb/src/optimizer/topn_optimizer.cpp +18 -8
  147. package/src/duckdb/src/parallel/executor.cpp +5 -0
  148. package/src/duckdb/src/parser/parsed_data/create_sequence_info.cpp +1 -1
  149. package/src/duckdb/src/parser/transform/expression/transform_interval.cpp +5 -1
  150. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +21 -24
  151. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -8
  152. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +3 -2
  153. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +0 -4
  154. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +3 -0
  155. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +3 -0
  156. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +3 -0
  157. package/src/duckdb/src/planner/expression/bound_columnref_expression.cpp +1 -1
  158. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +0 -1
  159. package/src/duckdb/src/planner/expression/bound_reference_expression.cpp +1 -1
  160. package/src/duckdb/src/planner/expression_binder.cpp +4 -2
  161. package/src/duckdb/src/planner/logical_operator.cpp +2 -1
  162. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +4 -1
  163. package/src/duckdb/src/storage/buffer/block_handle.cpp +8 -0
  164. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +44 -18
  165. package/src/duckdb/src/storage/caching_file_system.cpp +7 -7
  166. package/src/duckdb/src/storage/standard_buffer_manager.cpp +4 -3
  167. package/src/duckdb/src/storage/storage_info.cpp +2 -0
  168. package/src/duckdb/src/storage/wal_replay.cpp +9 -4
  169. package/src/duckdb/third_party/fmt/include/fmt/format.h +8 -1
  170. package/src/duckdb/third_party/fsst/libfsst.cpp +4 -3
  171. package/src/duckdb/third_party/httplib/httplib.hpp +25 -22
  172. package/src/duckdb/third_party/hyperloglog/sds.cpp +7 -3
  173. package/src/duckdb/third_party/libpg_query/src_common_keywords.cpp +8 -1
  174. package/src/duckdb/third_party/re2/re2/filtered_re2.h +8 -2
  175. package/src/duckdb/third_party/re2/re2/pod_array.h +7 -1
  176. package/src/duckdb/third_party/re2/re2/re2.cc +6 -2
  177. package/src/duckdb/third_party/re2/re2/set.cc +1 -1
  178. package/src/duckdb/third_party/re2/re2/set.h +7 -1
  179. package/src/duckdb/ub_src_logging.cpp +4 -4
@@ -105,7 +105,9 @@ public:
105
105
  DUCKDB_API void ScanWithPrefix(CatalogTransaction transaction, const std::function<void(CatalogEntry &)> &callback,
106
106
  const string &prefix);
107
107
  DUCKDB_API void Scan(CatalogTransaction transaction, const std::function<void(CatalogEntry &)> &callback);
108
+ DUCKDB_API void ScanWithReturn(CatalogTransaction transaction, const std::function<bool(CatalogEntry &)> &callback);
108
109
  DUCKDB_API void Scan(ClientContext &context, const std::function<void(CatalogEntry &)> &callback);
110
+ DUCKDB_API void ScanWithReturn(ClientContext &context, const std::function<bool(CatalogEntry &)> &callback);
109
111
 
110
112
  template <class T>
111
113
  vector<reference<T>> GetEntries(CatalogTransaction transaction) {
@@ -16,9 +16,9 @@ class Allocator;
16
16
  class BlockManager;
17
17
  struct FileHandle;
18
18
 
19
- enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2, TINY_BUFFER = 3 };
19
+ enum class FileBufferType : uint8_t { BLOCK = 1, MANAGED_BUFFER = 2, TINY_BUFFER = 3, EXTERNAL_FILE = 4 };
20
20
 
21
- static constexpr idx_t FILE_BUFFER_TYPE_COUNT = 3;
21
+ static constexpr idx_t FILE_BUFFER_TYPE_COUNT = 4;
22
22
 
23
23
  //! The FileBuffer represents a buffer that can be read or written to a Direct IO FileHandle.
24
24
  class FileBuffer {
@@ -70,7 +70,7 @@ inline
70
70
  shared_ptr<DATA_TYPE>
71
71
  make_shared_ptr(ARGS&&... args) // NOLINT: mimic std style
72
72
  {
73
- return shared_ptr<DATA_TYPE>(std::make_shared<DATA_TYPE>(std::forward<ARGS>(args)...));
73
+ return shared_ptr<DATA_TYPE>(duckdb_base_std::make_shared<DATA_TYPE>(std::forward<ARGS>(args)...));
74
74
  }
75
75
 
76
76
  template<class DATA_TYPE, class... ARGS>
@@ -82,31 +82,31 @@ make_unsafe_uniq(ARGS&&... args) // NOLINT: mimic std style
82
82
  }
83
83
 
84
84
  template<class DATA_TYPE>
85
- inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, true>
85
+ inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, true>
86
86
  make_uniq_array(size_t n) // NOLINT: mimic std style
87
87
  {
88
- return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, true>(new DATA_TYPE[n]());
88
+ return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, true>(new DATA_TYPE[n]());
89
89
  }
90
90
 
91
91
  template<class DATA_TYPE>
92
- inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, true>
92
+ inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, true>
93
93
  make_uniq_array_uninitialized(size_t n) // NOLINT: mimic std style
94
94
  {
95
- return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, true>(new DATA_TYPE[n]);
95
+ return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, true>(new DATA_TYPE[n]);
96
96
  }
97
97
 
98
98
  template<class DATA_TYPE>
99
- inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, false>
99
+ inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, false>
100
100
  make_unsafe_uniq_array(size_t n) // NOLINT: mimic std style
101
101
  {
102
- return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, false>(new DATA_TYPE[n]());
102
+ return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, false>(new DATA_TYPE[n]());
103
103
  }
104
104
 
105
105
  template<class DATA_TYPE>
106
- inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, false>
106
+ inline unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, false>
107
107
  make_unsafe_uniq_array_uninitialized(size_t n) // NOLINT: mimic std style
108
108
  {
109
- return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE>, false>(new DATA_TYPE[n]);
109
+ return unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>, false>(new DATA_TYPE[n]);
110
110
  }
111
111
 
112
112
  template<class DATA_TYPE, class... ARGS>
@@ -15,9 +15,9 @@
15
15
  #include "duckdb/planner/expression_iterator.hpp"
16
16
  #include "duckdb/planner/table_filter.hpp"
17
17
  #include "duckdb/common/open_file_info.hpp"
18
+ #include "duckdb/original/std/sstream.hpp"
18
19
 
19
20
  #include <iostream>
20
- #include <sstream>
21
21
 
22
22
  namespace duckdb {
23
23
  struct MultiFilePushdownInfo;
@@ -19,16 +19,15 @@ public:
19
19
  MultiFileColumnMapper(ClientContext &context, MultiFileReader &multi_file_reader, MultiFileReaderData &reader_data,
20
20
  const vector<MultiFileColumnDefinition> &global_columns,
21
21
  const vector<ColumnIndex> &global_column_ids, optional_ptr<TableFilterSet> filters,
22
- MultiFileList &multi_file_list, const MultiFileReaderBindData &bind_data,
23
- const virtual_column_map_t &virtual_columns);
22
+ MultiFileList &multi_file_list, const virtual_column_map_t &virtual_columns);
24
23
 
25
24
  public:
26
- ReaderInitializeType CreateMapping();
25
+ ReaderInitializeType CreateMapping(MultiFileColumnMappingMode mapping_mode);
27
26
 
28
27
  void ThrowColumnNotFoundError(const string &global_column_name) const;
29
28
 
30
29
  private:
31
- ResultColumnMapping CreateColumnMapping();
30
+ ResultColumnMapping CreateColumnMapping(MultiFileColumnMappingMode mapping_mode);
32
31
  ResultColumnMapping CreateColumnMappingByMapper(const ColumnMapper &mapper);
33
32
 
34
33
  unique_ptr<TableFilterSet> CreateFilters(map<idx_t, reference<TableFilter>> &filters, ResultColumnMapping &mapping);
@@ -45,7 +44,6 @@ private:
45
44
  const vector<MultiFileColumnDefinition> &global_columns;
46
45
  const vector<ColumnIndex> &global_column_ids;
47
46
  optional_ptr<TableFilterSet> global_filters;
48
- const MultiFileReaderBindData &bind_data;
49
47
  const virtual_column_map_t &virtual_columns;
50
48
  };
51
49
 
@@ -106,6 +106,13 @@ public:
106
106
  optional_ptr<MultiFileReaderGlobalState> global_state);
107
107
 
108
108
  //! Create all required mappings from the global types/names to the file-local types/names
109
+ DUCKDB_API virtual ReaderInitializeType
110
+ CreateMapping(ClientContext &context, MultiFileReaderData &reader_data,
111
+ const vector<MultiFileColumnDefinition> &global_columns, const vector<ColumnIndex> &global_column_ids,
112
+ optional_ptr<TableFilterSet> filters, MultiFileList &multi_file_list,
113
+ const MultiFileReaderBindData &bind_data, const virtual_column_map_t &virtual_columns,
114
+ MultiFileColumnMappingMode mapping_mode);
115
+
109
116
  DUCKDB_API virtual ReaderInitializeType
110
117
  CreateMapping(ClientContext &context, MultiFileReaderData &reader_data,
111
118
  const vector<MultiFileColumnDefinition> &global_columns, const vector<ColumnIndex> &global_column_ids,
@@ -84,6 +84,9 @@ struct MultiFileBindData : public TableFunctionData {
84
84
  void Initialize(ClientContext &, BaseUnionData &union_data) {
85
85
  Initialize(std::move(union_data.reader));
86
86
  }
87
+ bool SupportStatementCache() const override {
88
+ return false;
89
+ }
87
90
 
88
91
  unique_ptr<FunctionData> Copy() const override;
89
92
  };
@@ -0,0 +1,40 @@
1
+ #pragma once
2
+
3
+ #include "duckdb/original/std/memory.hpp"
4
+ #include "duckdb/original/std/locale.hpp"
5
+ #include "duckdb/original/std/sstream.hpp"
6
+ #include "duckdb/common/unique_ptr.hpp"
7
+ #include "duckdb/common/shared_ptr.hpp"
8
+
9
+ #ifndef DUCKDB_CLANG_TIDY
10
+ namespace std {
11
+ template <class C>
12
+ bool isspace(C c) {
13
+ static_assert(sizeof(C) == 0, "Use StringUtil::CharacterIsSpace instead of isspace!");
14
+ return false;
15
+ }
16
+ #ifndef DUCKDB_ENABLE_DEPRECATED_API
17
+ template <class T, class... ARGS>
18
+ static std::unique_ptr<T> make_unique(ARGS &&...__args) { // NOLINT: mimic std style
19
+ static_assert(sizeof(T) == 0, "Use make_uniq instead of make_unique!");
20
+ return nullptr;
21
+ }
22
+
23
+ template <class T, class... ARGS>
24
+ static std::shared_ptr<T> make_shared(ARGS &&...__args) { // NOLINT: mimic std style
25
+ static_assert(sizeof(T) == 0, "Use make_shared_ptr instead of make_shared!");
26
+ return nullptr;
27
+ }
28
+ #endif // DUCKDB_ENABLE_DEPRECATED_API
29
+
30
+ template <class charT, class traits = char_traits<charT>, class Allocator = allocator<charT>>
31
+ class basic_stringstream_mock;
32
+
33
+ typedef basic_stringstream_mock<char> stringstream;
34
+
35
+ } // namespace std
36
+
37
+ using std::isspace;
38
+ using std::make_shared;
39
+ using std::make_unique;
40
+ #endif
@@ -8,10 +8,33 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include <sstream>
11
+ #include "duckdb/original/std/sstream.hpp"
12
12
  #include <string>
13
+ #include <locale>
13
14
 
14
15
  namespace duckdb {
15
16
  using std::string;
16
- using std::stringstream;
17
+ } // namespace duckdb
18
+
19
+ namespace duckdb {
20
+
21
+ template <class charT, class traits = std::char_traits<charT>, class Allocator = std::allocator<charT>>
22
+ class basic_stringstream : public duckdb_base_std::basic_stringstream<charT, traits, Allocator> {
23
+ public:
24
+ using original = duckdb_base_std::basic_stringstream<charT, traits, Allocator>;
25
+
26
+ explicit basic_stringstream(std::ios_base::openmode which = std::ios_base::out | std::ios_base::in)
27
+ : original(which) {
28
+ this->imbue(std::locale::classic());
29
+ }
30
+ explicit basic_stringstream(const std::basic_string<charT, traits, Allocator> &s,
31
+ std::ios_base::openmode which = std::ios_base::out | std::ios_base::in)
32
+ : original(s, which) {
33
+ this->imbue(std::locale::classic());
34
+ }
35
+ basic_stringstream(const basic_stringstream &) = delete;
36
+ basic_stringstream(basic_stringstream &&rhs) noexcept;
37
+ };
38
+
39
+ typedef basic_stringstream<char> stringstream;
17
40
  } // namespace duckdb
@@ -129,46 +129,42 @@ public:
129
129
  static int Sign(hugeint_t n);
130
130
  static hugeint_t Abs(hugeint_t n);
131
131
  // comparison operators
132
- // note that everywhere here we intentionally use bitwise ops
133
- // this is because they seem to be consistently much faster (benchmarked on a Macbook Pro)
134
132
  static bool Equals(hugeint_t lhs, hugeint_t rhs) {
135
- int lower_equals = lhs.lower == rhs.lower;
136
- int upper_equals = lhs.upper == rhs.upper;
137
- return lower_equals & upper_equals;
133
+ bool lower_equals = lhs.lower == rhs.lower;
134
+ bool upper_equals = lhs.upper == rhs.upper;
135
+ return lower_equals && upper_equals;
138
136
  }
139
137
 
140
138
  static bool NotEquals(hugeint_t lhs, hugeint_t rhs) {
141
- int lower_not_equals = lhs.lower != rhs.lower;
142
- int upper_not_equals = lhs.upper != rhs.upper;
143
- return lower_not_equals | upper_not_equals;
139
+ return !Equals(lhs, rhs);
144
140
  }
145
141
 
146
142
  static bool GreaterThan(hugeint_t lhs, hugeint_t rhs) {
147
- int upper_bigger = lhs.upper > rhs.upper;
148
- int upper_equal = lhs.upper == rhs.upper;
149
- int lower_bigger = lhs.lower > rhs.lower;
150
- return upper_bigger | (upper_equal & lower_bigger);
143
+ bool upper_bigger = lhs.upper > rhs.upper;
144
+ bool upper_equal = lhs.upper == rhs.upper;
145
+ bool lower_bigger = lhs.lower > rhs.lower;
146
+ return upper_bigger || (upper_equal && lower_bigger);
151
147
  }
152
148
 
153
149
  static bool GreaterThanEquals(hugeint_t lhs, hugeint_t rhs) {
154
- int upper_bigger = lhs.upper > rhs.upper;
155
- int upper_equal = lhs.upper == rhs.upper;
156
- int lower_bigger_equals = lhs.lower >= rhs.lower;
157
- return upper_bigger | (upper_equal & lower_bigger_equals);
150
+ bool upper_bigger = lhs.upper > rhs.upper;
151
+ bool upper_equal = lhs.upper == rhs.upper;
152
+ bool lower_bigger_equals = lhs.lower >= rhs.lower;
153
+ return upper_bigger || (upper_equal && lower_bigger_equals);
158
154
  }
159
155
 
160
156
  static bool LessThan(hugeint_t lhs, hugeint_t rhs) {
161
- int upper_smaller = lhs.upper < rhs.upper;
162
- int upper_equal = lhs.upper == rhs.upper;
163
- int lower_smaller = lhs.lower < rhs.lower;
164
- return upper_smaller | (upper_equal & lower_smaller);
157
+ bool upper_smaller = lhs.upper < rhs.upper;
158
+ bool upper_equal = lhs.upper == rhs.upper;
159
+ bool lower_smaller = lhs.lower < rhs.lower;
160
+ return upper_smaller || (upper_equal && lower_smaller);
165
161
  }
166
162
 
167
163
  static bool LessThanEquals(hugeint_t lhs, hugeint_t rhs) {
168
- int upper_smaller = lhs.upper < rhs.upper;
169
- int upper_equal = lhs.upper == rhs.upper;
170
- int lower_smaller_equals = lhs.lower <= rhs.lower;
171
- return upper_smaller | (upper_equal & lower_smaller_equals);
164
+ bool upper_smaller = lhs.upper < rhs.upper;
165
+ bool upper_equal = lhs.upper == rhs.upper;
166
+ bool lower_smaller_equals = lhs.lower <= rhs.lower;
167
+ return upper_smaller || (upper_equal && lower_smaller_equals);
172
168
  }
173
169
 
174
170
  static constexpr uint8_t CACHED_POWERS_OF_TEN = 39;
@@ -118,46 +118,42 @@ public:
118
118
  static hugeint_t Abs(hugeint_t n);
119
119
 
120
120
  // comparison operators
121
- // note that everywhere here we intentionally use bitwise ops
122
- // this is because they seem to be consistently much faster (benchmarked on a Macbook Pro)
123
121
  static bool Equals(uhugeint_t lhs, uhugeint_t rhs) {
124
- int lower_equals = lhs.lower == rhs.lower;
125
- int upper_equals = lhs.upper == rhs.upper;
126
- return lower_equals & upper_equals;
122
+ bool lower_equals = lhs.lower == rhs.lower;
123
+ bool upper_equals = lhs.upper == rhs.upper;
124
+ return lower_equals && upper_equals;
127
125
  }
128
126
 
129
127
  static bool NotEquals(uhugeint_t lhs, uhugeint_t rhs) {
130
- int lower_not_equals = lhs.lower != rhs.lower;
131
- int upper_not_equals = lhs.upper != rhs.upper;
132
- return lower_not_equals | upper_not_equals;
128
+ return !Equals(lhs, rhs);
133
129
  }
134
130
 
135
131
  static bool GreaterThan(uhugeint_t lhs, uhugeint_t rhs) {
136
- int upper_bigger = lhs.upper > rhs.upper;
137
- int upper_equal = lhs.upper == rhs.upper;
138
- int lower_bigger = lhs.lower > rhs.lower;
139
- return upper_bigger | (upper_equal & lower_bigger);
132
+ bool upper_bigger = lhs.upper > rhs.upper;
133
+ bool upper_equal = lhs.upper == rhs.upper;
134
+ bool lower_bigger = lhs.lower > rhs.lower;
135
+ return upper_bigger || (upper_equal && lower_bigger);
140
136
  }
141
137
 
142
138
  static bool GreaterThanEquals(uhugeint_t lhs, uhugeint_t rhs) {
143
- int upper_bigger = lhs.upper > rhs.upper;
144
- int upper_equal = lhs.upper == rhs.upper;
145
- int lower_bigger_equals = lhs.lower >= rhs.lower;
146
- return upper_bigger | (upper_equal & lower_bigger_equals);
139
+ bool upper_bigger = lhs.upper > rhs.upper;
140
+ bool upper_equal = lhs.upper == rhs.upper;
141
+ bool lower_bigger_equals = lhs.lower >= rhs.lower;
142
+ return upper_bigger || (upper_equal && lower_bigger_equals);
147
143
  }
148
144
 
149
145
  static bool LessThan(uhugeint_t lhs, uhugeint_t rhs) {
150
- int upper_smaller = lhs.upper < rhs.upper;
151
- int upper_equal = lhs.upper == rhs.upper;
152
- int lower_smaller = lhs.lower < rhs.lower;
153
- return upper_smaller | (upper_equal & lower_smaller);
146
+ bool upper_smaller = lhs.upper < rhs.upper;
147
+ bool upper_equal = lhs.upper == rhs.upper;
148
+ bool lower_smaller = lhs.lower < rhs.lower;
149
+ return upper_smaller || (upper_equal && lower_smaller);
154
150
  }
155
151
 
156
152
  static bool LessThanEquals(uhugeint_t lhs, uhugeint_t rhs) {
157
- int upper_smaller = lhs.upper < rhs.upper;
158
- int upper_equal = lhs.upper == rhs.upper;
159
- int lower_smaller_equals = lhs.lower <= rhs.lower;
160
- return upper_smaller | (upper_equal & lower_smaller_equals);
153
+ bool upper_smaller = lhs.upper < rhs.upper;
154
+ bool upper_equal = lhs.upper == rhs.upper;
155
+ bool lower_smaller_equals = lhs.lower <= rhs.lower;
156
+ return upper_smaller || (upper_equal && lower_smaller_equals);
161
157
  }
162
158
 
163
159
  static constexpr uint8_t CACHED_POWERS_OF_TEN = 39;
@@ -349,6 +349,9 @@ struct LogicalType {
349
349
 
350
350
  DUCKDB_API void Verify() const;
351
351
 
352
+ DUCKDB_API bool IsSigned() const;
353
+ DUCKDB_API bool IsUnsigned() const;
354
+
352
355
  DUCKDB_API bool IsValid() const;
353
356
  DUCKDB_API bool IsComplete() const;
354
357
 
@@ -3,16 +3,16 @@
3
3
  #include "duckdb/common/exception.hpp"
4
4
  #include "duckdb/common/likely.hpp"
5
5
  #include "duckdb/common/memory_safety.hpp"
6
+ #include "duckdb/original/std/memory.hpp"
6
7
 
7
- #include <memory>
8
8
  #include <type_traits>
9
9
 
10
10
  namespace duckdb {
11
11
 
12
12
  template <class DATA_TYPE, class DELETER = std::default_delete<DATA_TYPE>, bool SAFE = true>
13
- class unique_ptr : public std::unique_ptr<DATA_TYPE, DELETER> { // NOLINT: naming
13
+ class unique_ptr : public duckdb_base_std::unique_ptr<DATA_TYPE, DELETER> { // NOLINT: naming
14
14
  public:
15
- using original = std::unique_ptr<DATA_TYPE, DELETER>;
15
+ using original = duckdb_base_std::unique_ptr<DATA_TYPE, DELETER>;
16
16
  using original::original; // NOLINT
17
17
  using pointer = typename original::pointer;
18
18
 
@@ -54,11 +54,37 @@ public:
54
54
  }
55
55
  };
56
56
 
57
- // FIXME: DELETER is defined, but we use std::default_delete???
57
+ template <class DATA_TYPE, class DELETER>
58
+ class unique_ptr<DATA_TYPE[], DELETER, true> : public duckdb_base_std::unique_ptr<DATA_TYPE[], DELETER> {
59
+ public:
60
+ using original = duckdb_base_std::unique_ptr<DATA_TYPE[], DELETER>;
61
+ using original::original;
62
+
63
+ private:
64
+ static inline void AssertNotNull(const bool null) {
65
+ #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
66
+ return;
67
+ #else
68
+ if (DUCKDB_UNLIKELY(null)) {
69
+ throw duckdb::InternalException("Attempted to dereference unique_ptr that is NULL!");
70
+ }
71
+ #endif
72
+ }
73
+
74
+ public:
75
+ typename std::add_lvalue_reference<DATA_TYPE>::type operator[](size_t __i) const { // NOLINT: hiding on purpose
76
+ const auto ptr = original::get();
77
+ if (MemorySafety<true>::ENABLED) {
78
+ AssertNotNull(!ptr);
79
+ }
80
+ return ptr[__i];
81
+ }
82
+ };
83
+
58
84
  template <class DATA_TYPE, class DELETER, bool SAFE>
59
- class unique_ptr<DATA_TYPE[], DELETER, SAFE> : public std::unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>> {
85
+ class unique_ptr<DATA_TYPE[], DELETER, SAFE> : public duckdb_base_std::unique_ptr<DATA_TYPE[], DELETER> {
60
86
  public:
61
- using original = std::unique_ptr<DATA_TYPE[], std::default_delete<DATA_TYPE[]>>;
87
+ using original = duckdb_base_std::unique_ptr<DATA_TYPE[], DELETER>;
62
88
  using original::original;
63
89
 
64
90
  private:
@@ -83,10 +109,10 @@ public:
83
109
  };
84
110
 
85
111
  template <typename T>
86
- using unique_array = unique_ptr<T[], std::default_delete<T>, true>;
112
+ using unique_array = unique_ptr<T[], std::default_delete<T[]>, true>;
87
113
 
88
114
  template <typename T>
89
- using unsafe_unique_array = unique_ptr<T[], std::default_delete<T>, false>;
115
+ using unsafe_unique_array = unique_ptr<T[], std::default_delete<T[]>, false>;
90
116
 
91
117
  template <typename T>
92
118
  using unsafe_unique_ptr = unique_ptr<T, std::default_delete<T>, false>;
@@ -26,6 +26,7 @@ public:
26
26
 
27
27
  protected:
28
28
  vector<ColumnBinding> bindings;
29
+ vector<LogicalType> types;
29
30
  bool verify_only;
30
31
 
31
32
  unique_ptr<Expression> VisitReplace(BoundColumnRefExpression &expr, unique_ptr<Expression> *expr_ptr) override;
@@ -168,8 +168,8 @@ public:
168
168
  TupleDataChunkState chunk_state;
169
169
  };
170
170
 
171
- JoinHashTable(ClientContext &context, const vector<JoinCondition> &conditions, vector<LogicalType> build_types,
172
- JoinType type, const vector<idx_t> &output_columns);
171
+ JoinHashTable(ClientContext &context, const PhysicalOperator &op, const vector<JoinCondition> &conditions,
172
+ vector<LogicalType> build_types, JoinType type, const vector<idx_t> &output_columns);
173
173
  ~JoinHashTable();
174
174
 
175
175
  //! Add the given data to the HT
@@ -214,6 +214,7 @@ public:
214
214
  }
215
215
 
216
216
  ClientContext &context;
217
+ const PhysicalOperator &op;
217
218
  //! BufferManager
218
219
  BufferManager &buffer_manager;
219
220
  //! The join conditions
@@ -27,6 +27,8 @@ struct ColumnCount {
27
27
  bool is_comment = false;
28
28
  //! If this row is potentially a mid-line comment
29
29
  bool is_mid_comment = false;
30
+ //! Number of empty lines before this value
31
+ idx_t empty_lines = 0;
30
32
  };
31
33
 
32
34
  class ColumnCountResult : public ScannerResult {
@@ -48,6 +50,7 @@ public:
48
50
  map<idx_t, idx_t> rows_per_column_count;
49
51
  CSVErrorHandler &error_handler;
50
52
  map<idx_t, shared_ptr<CSVBufferHandle>> buffer_handles;
53
+ idx_t empty_lines = 0;
51
54
  //! Adds a Value to the result
52
55
  static inline void AddValue(ColumnCountResult &result, idx_t buffer_pos);
53
56
  //! Adds a Row to the result
@@ -40,7 +40,7 @@ struct CSVEncoderBuffer {
40
40
 
41
41
  private:
42
42
  //! The encoded buffer, we only have one per file, so we cache it and make sure to pass over unused bytes.
43
- std::unique_ptr<char[]> encoded_buffer;
43
+ duckdb::unique_ptr<char[]> encoded_buffer;
44
44
  //! The encoded buffer size is defined as buffer_size/GetRatio()
45
45
  idx_t encoded_buffer_size;
46
46
  };
@@ -30,6 +30,18 @@ struct QuoteEscapeCombination {
30
30
  char escape;
31
31
  };
32
32
 
33
+ //! Current stats of candidate analysis
34
+ struct CandidateStats {
35
+ //! Number of rows read
36
+ idx_t rows_read = 0;
37
+ //! Best Number of consistent rows (i.e., presenting all columns)
38
+ idx_t best_consistent_rows = 0;
39
+ //! If padding was necessary (i.e., rows are missing some columns, how many)
40
+ idx_t prev_padding_count = 0;
41
+ //! Min number of ignored rows
42
+ idx_t min_ignored_rows = 0;
43
+ };
44
+
33
45
  //! All the options that will be used to sniff the dialect of the CSV file
34
46
  struct DialectCandidates {
35
47
  //! The constructor populates all of our the options that will be used in our sniffer search space
@@ -131,9 +143,9 @@ private:
131
143
  void GenerateStateMachineSearchSpace(vector<unique_ptr<ColumnCountScanner>> &column_count_scanners,
132
144
  const DialectCandidates &dialect_candidates);
133
145
 
134
- //! 2. Analyzes if dialect candidate is a good candidate to be considered, if so, it adds it to the candidates
135
- void AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner>, idx_t &rows_read, idx_t &best_consistent_rows,
136
- idx_t &prev_padding_count, idx_t &min_ignored_rows);
146
+ //! 2. Analyzes if a dialect candidate is a good candidate to be considered, if so, it adds it to the candidates
147
+ void AnalyzeDialectCandidate(unique_ptr<ColumnCountScanner>, CandidateStats &stats,
148
+ vector<unique_ptr<ColumnCountScanner>> &successful_candidates);
137
149
  //! 3. Refine Candidates over remaining chunks
138
150
  void RefineCandidates();
139
151
 
@@ -197,9 +197,9 @@ struct VectorCastHelpers {
197
197
 
198
198
  if (STRUCT_KEY) {
199
199
  needs_quotes = true;
200
- } else if (isspace(string_data[0])) {
200
+ } else if (StringUtil::CharacterIsSpace(string_data[0])) {
201
201
  needs_quotes = true;
202
- } else if (base_length >= 2 && isspace(string_data[base_length - 1])) {
202
+ } else if (base_length >= 2 && StringUtil::CharacterIsSpace(string_data[base_length - 1])) {
203
203
  needs_quotes = true;
204
204
  } else if (StringUtil::CIEquals(string_data, base_length, "null", 4)) {
205
205
  needs_quotes = true;
@@ -20,6 +20,7 @@ struct CopyFunctionFileStatistics;
20
20
  class Binder;
21
21
  class ColumnDataCollection;
22
22
  class ExecutionContext;
23
+ class PhysicalOperatorLogger;
23
24
 
24
25
  struct LocalFunctionData {
25
26
  virtual ~LocalFunctionData() = default;
@@ -123,6 +124,8 @@ typedef void (*copy_to_get_written_statistics_t)(ClientContext &context, Functio
123
124
 
124
125
  typedef vector<unique_ptr<Expression>> (*copy_to_select_t)(CopyToSelectInput &input);
125
126
 
127
+ typedef void (*copy_to_initialize_operator_t)(GlobalFunctionData &gstate, const PhysicalOperator &op);
128
+
126
129
  enum class CopyFunctionReturnType : uint8_t {
127
130
  CHANGED_ROWS = 0,
128
131
  CHANGED_ROWS_AND_FILE_LIST = 1,
@@ -145,9 +148,9 @@ public:
145
148
  : Function(name), plan(nullptr), copy_to_select(nullptr), copy_to_bind(nullptr),
146
149
  copy_to_initialize_local(nullptr), copy_to_initialize_global(nullptr),
147
150
  copy_to_get_written_statistics(nullptr), copy_to_sink(nullptr), copy_to_combine(nullptr),
148
- copy_to_finalize(nullptr), execution_mode(nullptr), prepare_batch(nullptr), flush_batch(nullptr),
149
- desired_batch_size(nullptr), rotate_files(nullptr), rotate_next_file(nullptr), serialize(nullptr),
150
- deserialize(nullptr), copy_from_bind(nullptr) {
151
+ copy_to_finalize(nullptr), execution_mode(nullptr), initialize_operator(nullptr), prepare_batch(nullptr),
152
+ flush_batch(nullptr), desired_batch_size(nullptr), rotate_files(nullptr), rotate_next_file(nullptr),
153
+ serialize(nullptr), deserialize(nullptr), copy_from_bind(nullptr) {
151
154
  }
152
155
 
153
156
  //! Plan rewrite copy function
@@ -162,6 +165,7 @@ public:
162
165
  copy_to_combine_t copy_to_combine;
163
166
  copy_to_finalize_t copy_to_finalize;
164
167
  copy_to_execution_mode_t execution_mode;
168
+ copy_to_initialize_operator_t initialize_operator;
165
169
 
166
170
  copy_prepare_batch_t prepare_batch;
167
171
  copy_flush_batch_t flush_batch;
@@ -61,6 +61,7 @@ struct FunctionData {
61
61
  DUCKDB_API virtual unique_ptr<FunctionData> Copy() const = 0;
62
62
  DUCKDB_API virtual bool Equals(const FunctionData &other) const = 0;
63
63
  DUCKDB_API static bool Equals(const FunctionData *left, const FunctionData *right);
64
+ DUCKDB_API virtual bool SupportStatementCache() const;
64
65
 
65
66
  template <class TARGET>
66
67
  TARGET &Cast() {
@@ -73,9 +73,10 @@ public:
73
73
  const vector<unique_ptr<Expression>> &groups);
74
74
  DUCKDB_API static void BindSortedAggregate(ClientContext &context, BoundWindowExpression &expr);
75
75
 
76
- private:
77
76
  //! Cast a set of expressions to the arguments of this function
78
77
  void CastToFunctionArguments(SimpleFunction &function, vector<unique_ptr<Expression>> &children);
78
+
79
+ private:
79
80
  optional_idx BindVarArgsFunctionCost(const SimpleFunction &func, const vector<LogicalType> &arguments);
80
81
  optional_idx BindFunctionCost(const SimpleFunction &func, const vector<LogicalType> &arguments);
81
82