duckdb 0.7.2-dev2995.0 → 0.7.2-dev3117.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/binding.gyp +1 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/json/include/json_deserializer.hpp +1 -0
  4. package/src/duckdb/extension/json/include/json_serializer.hpp +8 -1
  5. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +1 -3
  6. package/src/duckdb/extension/json/json_functions/json_structure.cpp +3 -3
  7. package/src/duckdb/extension/json/json_functions/json_transform.cpp +3 -2
  8. package/src/duckdb/extension/parquet/parquet-extension.cpp +9 -7
  9. package/src/duckdb/src/common/enum_util.cpp +5908 -0
  10. package/src/duckdb/src/common/enums/expression_type.cpp +216 -4
  11. package/src/duckdb/src/common/enums/join_type.cpp +6 -5
  12. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  13. package/src/duckdb/src/common/exception.cpp +1 -1
  14. package/src/duckdb/src/common/exception_format_value.cpp +2 -2
  15. package/src/duckdb/src/common/multi_file_reader.cpp +14 -0
  16. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +143 -0
  17. package/src/duckdb/src/common/serializer/binary_serializer.cpp +160 -0
  18. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +3 -3
  19. package/src/duckdb/src/common/types.cpp +11 -10
  20. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +4 -4
  21. package/src/duckdb/src/core_functions/scalar/date/date_part.cpp +2 -1
  22. package/src/duckdb/src/core_functions/scalar/list/list_sort.cpp +2 -3
  23. package/src/duckdb/src/execution/aggregate_hashtable.cpp +3 -3
  24. package/src/duckdb/src/execution/operator/aggregate/distinct_aggregate_data.cpp +1 -1
  25. package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +2 -2
  26. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +3 -2
  27. package/src/duckdb/src/execution/operator/helper/physical_streaming_sample.cpp +2 -1
  28. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +2 -1
  29. package/src/duckdb/src/execution/operator/join/physical_comparison_join.cpp +2 -1
  30. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +165 -0
  31. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +1 -1
  32. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +9 -7
  33. package/src/duckdb/src/execution/partitionable_hashtable.cpp +2 -2
  34. package/src/duckdb/src/execution/physical_plan/plan_copy_to_file.cpp +25 -4
  35. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +2 -1
  36. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +1 -1
  37. package/src/duckdb/src/function/scalar/operators/arithmetic.cpp +5 -4
  38. package/src/duckdb/src/function/table/copy_csv.cpp +85 -29
  39. package/src/duckdb/src/function/table/read_csv.cpp +17 -11
  40. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +2 -1
  41. package/src/duckdb/src/function/table/system/duckdb_types.cpp +2 -1
  42. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  43. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +958 -0
  44. package/src/duckdb/src/include/duckdb/common/enums/join_type.hpp +3 -3
  45. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  46. package/src/duckdb/src/include/duckdb/common/exception.hpp +4 -4
  47. package/src/duckdb/src/include/duckdb/common/exception_format_value.hpp +3 -2
  48. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +44 -0
  49. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +93 -0
  50. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +92 -0
  51. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +7 -3
  52. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +2 -2
  53. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +1 -1
  54. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +1 -1
  55. package/src/duckdb/src/include/duckdb/common/types.hpp +1 -0
  56. package/src/duckdb/src/include/duckdb/common/vector.hpp +61 -14
  57. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +3 -2
  58. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/distinct_aggregate_data.hpp +2 -2
  59. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/grouped_aggregate_data.hpp +2 -2
  60. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +3 -3
  61. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +68 -0
  62. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +2 -0
  63. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +3 -3
  64. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -2
  65. package/src/duckdb/src/include/duckdb/function/copy_function.hpp +32 -4
  66. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +4 -2
  67. package/src/duckdb/src/include/duckdb/main/config.hpp +2 -0
  68. package/src/duckdb/src/include/duckdb/main/database.hpp +1 -3
  69. package/src/duckdb/src/include/duckdb/main/database_path_and_type.hpp +24 -0
  70. package/src/duckdb/src/include/duckdb/main/relation/setop_relation.hpp +1 -0
  71. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +1 -0
  72. package/src/duckdb/src/include/duckdb/planner/expression_binder/base_select_binder.hpp +2 -0
  73. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +2 -0
  74. package/src/duckdb/src/include/duckdb/planner/operator/logical_aggregate.hpp +1 -1
  75. package/src/duckdb/src/include/duckdb/planner/query_node/bound_select_node.hpp +1 -1
  76. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +26 -0
  77. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  78. package/src/duckdb/src/main/client_context.cpp +1 -0
  79. package/src/duckdb/src/main/client_verify.cpp +1 -0
  80. package/src/duckdb/src/main/database.cpp +11 -23
  81. package/src/duckdb/src/main/database_path_and_type.cpp +23 -0
  82. package/src/duckdb/src/main/relation/join_relation.cpp +2 -1
  83. package/src/duckdb/src/main/relation/setop_relation.cpp +2 -3
  84. package/src/duckdb/src/parser/expression/window_expression.cpp +1 -1
  85. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +2 -2
  86. package/src/duckdb/src/parser/query_node/select_node.cpp +1 -1
  87. package/src/duckdb/src/parser/result_modifier.cpp +2 -2
  88. package/src/duckdb/src/parser/statement/select_statement.cpp +0 -44
  89. package/src/duckdb/src/parser/tableref/joinref.cpp +3 -3
  90. package/src/duckdb/src/parser/tableref.cpp +1 -1
  91. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +3 -3
  92. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +6 -0
  93. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +4 -1
  94. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +7 -0
  95. package/src/duckdb/src/planner/operator/logical_aggregate.cpp +1 -1
  96. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +2 -2
  97. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +2 -1
  98. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +20 -0
  99. package/src/duckdb/src/verification/statement_verifier.cpp +3 -0
  100. package/src/duckdb/ub_src_common.cpp +2 -2
  101. package/src/duckdb/ub_src_common_serializer.cpp +4 -2
  102. package/src/duckdb/ub_src_execution_operator_persistent.cpp +2 -0
  103. package/src/duckdb/ub_src_main.cpp +2 -0
  104. package/src/duckdb/src/common/serializer/enum_serializer.cpp +0 -1180
  105. package/src/duckdb/src/common/vector.cpp +0 -12
  106. package/src/duckdb/src/include/duckdb/common/serializer/enum_serializer.hpp +0 -113
@@ -29,13 +29,13 @@ enum class JoinType : uint8_t {
29
29
  // (and NULL if no partner is found)
30
30
  };
31
31
 
32
- //! Convert join type to string
33
- DUCKDB_API string JoinTypeToString(JoinType type);
34
-
35
32
  //! True if join is left or full outer join
36
33
  bool IsLeftOuterJoin(JoinType type);
37
34
 
38
35
  //! True if join is rght or full outer join
39
36
  bool IsRightOuterJoin(JoinType type);
40
37
 
38
+ // **DEPRECATED**: Use EnumUtil directly instead.
39
+ string JoinTypeToString(JoinType type);
40
+
41
41
  } // namespace duckdb
@@ -30,6 +30,7 @@ enum class PhysicalOperatorType : uint8_t {
30
30
  FILTER,
31
31
  PROJECTION,
32
32
  COPY_TO_FILE,
33
+ BATCH_COPY_TO_FILE,
33
34
  RESERVOIR_SAMPLE,
34
35
  STREAMING_SAMPLE,
35
36
  STREAMING_WINDOW,
@@ -10,11 +10,11 @@
10
10
 
11
11
  #include "duckdb/common/assert.hpp"
12
12
  #include "duckdb/common/exception_format_value.hpp"
13
- #include "duckdb/common/vector.hpp"
14
13
  #include "duckdb/common/shared_ptr.hpp"
15
14
  #include "duckdb/common/map.hpp"
16
15
  #include "duckdb/common/typedefs.hpp"
17
16
 
17
+ #include <vector>
18
18
  #include <stdexcept>
19
19
 
20
20
  namespace duckdb {
@@ -105,14 +105,14 @@ public:
105
105
 
106
106
  template <typename... Args>
107
107
  static string ConstructMessage(const string &msg, Args... params) {
108
- vector<ExceptionFormatValue> values;
108
+ std::vector<ExceptionFormatValue> values;
109
109
  return ConstructMessageRecursive(msg, values, params...);
110
110
  }
111
111
 
112
- DUCKDB_API static string ConstructMessageRecursive(const string &msg, vector<ExceptionFormatValue> &values);
112
+ DUCKDB_API static string ConstructMessageRecursive(const string &msg, std::vector<ExceptionFormatValue> &values);
113
113
 
114
114
  template <class T, typename... Args>
115
- static string ConstructMessageRecursive(const string &msg, vector<ExceptionFormatValue> &values, T param,
115
+ static string ConstructMessageRecursive(const string &msg, std::vector<ExceptionFormatValue> &values, T param,
116
116
  Args... params) {
117
117
  values.push_back(ExceptionFormatValue::CreateFormatValue<T>(param));
118
118
  return ConstructMessageRecursive(msg, values, params...);
@@ -10,7 +10,8 @@
10
10
 
11
11
  #include "duckdb/common/string.hpp"
12
12
  #include "duckdb/common/hugeint.hpp"
13
- #include "duckdb/common/vector.hpp"
13
+
14
+ #include <vector>
14
15
 
15
16
  namespace duckdb {
16
17
 
@@ -40,7 +41,7 @@ public:
40
41
  static ExceptionFormatValue CreateFormatValue(T value) {
41
42
  return int64_t(value);
42
43
  }
43
- static string Format(const string &msg, vector<ExceptionFormatValue> &values);
44
+ static string Format(const string &msg, std::vector<ExceptionFormatValue> &values);
44
45
  };
45
46
 
46
47
  template <>
@@ -9,6 +9,8 @@
9
9
  #pragma once
10
10
 
11
11
  #include "duckdb/common/types.hpp"
12
+ #include "duckdb/common/hive_partitioning.hpp"
13
+ #include "re2/re2.h"
12
14
 
13
15
  namespace duckdb {
14
16
  class Serializer;
@@ -18,11 +20,53 @@ struct BindInfo;
18
20
  struct MultiFileReaderOptions {
19
21
  bool filename = false;
20
22
  bool hive_partitioning = false;
23
+ bool auto_detect_hive_partitioning = true;
21
24
  bool union_by_name = false;
22
25
 
23
26
  DUCKDB_API void Serialize(Serializer &serializer) const;
24
27
  DUCKDB_API static MultiFileReaderOptions Deserialize(Deserializer &source);
25
28
  DUCKDB_API void AddBatchInfo(BindInfo &bind_info) const;
29
+
30
+ static bool AutoDetectHivePartitioning(const vector<string> &files) {
31
+ if (files.empty()) {
32
+ return false;
33
+ }
34
+
35
+ std::unordered_set<string> uset;
36
+ idx_t splits_size;
37
+ {
38
+ // front file
39
+ auto splits = StringUtil::Split(files.front(), FileSystem::PathSeparator());
40
+ splits_size = splits.size();
41
+ if (splits.size() < 2) {
42
+ return false;
43
+ }
44
+ for (auto it = splits.begin(); it != std::prev(splits.end()); it++) {
45
+ auto part = StringUtil::Split(*it, "=");
46
+ if (part.size() == 2) {
47
+ uset.insert(part.front());
48
+ }
49
+ }
50
+ }
51
+ if (uset.empty()) {
52
+ return false;
53
+ }
54
+ for (auto &file : files) {
55
+ auto splits = StringUtil::Split(file, FileSystem::PathSeparator());
56
+ if (splits.size() != splits_size) {
57
+ return false;
58
+ }
59
+ for (auto it = splits.begin(); it != std::prev(splits.end()); it++) {
60
+ auto part = StringUtil::Split(*it, "=");
61
+ if (part.size() == 2) {
62
+ if (uset.find(part.front()) == uset.end()) {
63
+ return false;
64
+ }
65
+ }
66
+ }
67
+ }
68
+ return true;
69
+ }
26
70
  };
27
71
 
28
72
  } // namespace duckdb
@@ -0,0 +1,93 @@
1
+ #pragma once
2
+ #include "duckdb/common/serializer/format_deserializer.hpp"
3
+
4
+ namespace duckdb {
5
+
6
+ class BinaryDeserializer : public FormatDeserializer {
7
+ public:
8
+ template <class T>
9
+ static unique_ptr<T> Deserialize(data_ptr_t ptr, idx_t length) {
10
+ BinaryDeserializer deserializer(ptr, length);
11
+ deserializer.OnObjectBegin();
12
+ auto result = T::FormatDeserialize(deserializer);
13
+ deserializer.OnObjectEnd();
14
+ return result;
15
+ }
16
+
17
+ private:
18
+ explicit BinaryDeserializer(data_ptr_t ptr, idx_t length) : ptr(ptr), end_ptr(ptr + length) {
19
+ deserialize_enum_from_string = false;
20
+ }
21
+ struct State {
22
+ uint32_t expected_field_count;
23
+ idx_t expected_size;
24
+ uint32_t read_field_count;
25
+ State(uint32_t expected_field_count, idx_t expected_size)
26
+ : expected_field_count(expected_field_count), expected_size(expected_size), read_field_count(0) {
27
+ }
28
+ };
29
+
30
+ const char *current_tag = nullptr;
31
+ data_ptr_t ptr;
32
+ data_ptr_t end_ptr;
33
+ vector<State> stack;
34
+
35
+ template <class T>
36
+ T ReadPrimitive() {
37
+ T value;
38
+ ReadData((data_ptr_t)&value, sizeof(T));
39
+ return value;
40
+ }
41
+
42
+ void ReadData(data_ptr_t buffer, idx_t read_size) {
43
+ if (ptr + read_size > end_ptr) {
44
+ throw SerializationException("Failed to deserialize: not enough data in buffer to fulfill read request");
45
+ }
46
+ memcpy(buffer, ptr, read_size);
47
+ ptr += read_size;
48
+ }
49
+
50
+ // Set the 'tag' of the property to read
51
+ void SetTag(const char *tag) final;
52
+
53
+ //===--------------------------------------------------------------------===//
54
+ // Nested Types Hooks
55
+ //===--------------------------------------------------------------------===//
56
+ void OnObjectBegin() final;
57
+ void OnObjectEnd() final;
58
+ idx_t OnListBegin() final;
59
+ void OnListEnd() final;
60
+ idx_t OnMapBegin() final;
61
+ void OnMapEnd() final;
62
+ void OnMapEntryBegin() final;
63
+ void OnMapEntryEnd() final;
64
+ void OnMapKeyBegin() final;
65
+ void OnMapValueBegin() final;
66
+ bool OnOptionalBegin() final;
67
+
68
+ void OnPairBegin() final;
69
+ void OnPairKeyBegin() final;
70
+ void OnPairValueBegin() final;
71
+ void OnPairEnd() final;
72
+
73
+ //===--------------------------------------------------------------------===//
74
+ // Primitive Types
75
+ //===--------------------------------------------------------------------===//
76
+ bool ReadBool() final;
77
+ int8_t ReadSignedInt8() final;
78
+ uint8_t ReadUnsignedInt8() final;
79
+ int16_t ReadSignedInt16() final;
80
+ uint16_t ReadUnsignedInt16() final;
81
+ int32_t ReadSignedInt32() final;
82
+ uint32_t ReadUnsignedInt32() final;
83
+ int64_t ReadSignedInt64() final;
84
+ uint64_t ReadUnsignedInt64() final;
85
+ float ReadFloat() final;
86
+ double ReadDouble() final;
87
+ string ReadString() final;
88
+ interval_t ReadInterval() final;
89
+ hugeint_t ReadHugeInt() final;
90
+ void ReadDataPtr(data_ptr_t &ptr, idx_t count) final;
91
+ };
92
+
93
+ } // namespace duckdb
@@ -0,0 +1,92 @@
1
+ #pragma once
2
+
3
+ #include "duckdb/common/serializer/format_serializer.hpp"
4
+
5
+ namespace duckdb {
6
+
7
+ struct BinarySerializer : public FormatSerializer {
8
+
9
+ private:
10
+ struct State {
11
+ // how many fields are present in the object
12
+ uint32_t field_count;
13
+ // the size of the object
14
+ uint64_t size;
15
+ // the offset of the object start in the buffer
16
+ uint64_t offset;
17
+ };
18
+
19
+ const char *current_tag;
20
+
21
+ vector<data_t> data;
22
+ vector<State> stack;
23
+
24
+ template <class T>
25
+ void Write(T element) {
26
+ static_assert(std::is_trivially_destructible<T>(), "Write element must be trivially destructible");
27
+ WriteData((const_data_ptr_t)&element, sizeof(T));
28
+ }
29
+ void WriteData(const_data_ptr_t buffer, idx_t write_size) {
30
+ data.insert(data.end(), buffer, buffer + write_size);
31
+ stack.back().size += write_size;
32
+ }
33
+
34
+ explicit BinarySerializer() {
35
+ serialize_enum_as_string = false;
36
+ }
37
+
38
+ public:
39
+ template <class T>
40
+ static vector<data_t> Serialize(T &obj) {
41
+ BinarySerializer serializer;
42
+ serializer.OnObjectBegin();
43
+ obj.FormatSerialize(serializer);
44
+ serializer.OnObjectEnd();
45
+ return std::move(serializer.data);
46
+ }
47
+
48
+ void SetTag(const char *tag) final;
49
+
50
+ //===--------------------------------------------------------------------===//
51
+ // Nested Types Hooks
52
+ //===--------------------------------------------------------------------===//
53
+ void OnOptionalBegin(bool present) final;
54
+ void OnListBegin(idx_t count) final;
55
+ void OnListEnd(idx_t count) final;
56
+ void OnMapBegin(idx_t count) final;
57
+ void OnMapEntryBegin() final;
58
+ void OnMapEntryEnd() final;
59
+ void OnMapKeyBegin() final;
60
+ void OnMapValueBegin() final;
61
+ void OnMapEnd(idx_t count) final;
62
+ void OnObjectBegin() final;
63
+ void OnObjectEnd() final;
64
+ void OnPairBegin() final;
65
+ void OnPairKeyBegin() final;
66
+ void OnPairValueBegin() final;
67
+ void OnPairEnd() final;
68
+
69
+ //===--------------------------------------------------------------------===//
70
+ // Primitive Types
71
+ //===--------------------------------------------------------------------===//
72
+ void WriteNull() final;
73
+ void WriteValue(uint8_t value) final;
74
+ void WriteValue(int8_t value) final;
75
+ void WriteValue(uint16_t value) final;
76
+ void WriteValue(int16_t value) final;
77
+ void WriteValue(uint32_t value) final;
78
+ void WriteValue(int32_t value) final;
79
+ void WriteValue(uint64_t value) final;
80
+ void WriteValue(int64_t value) final;
81
+ void WriteValue(hugeint_t value) final;
82
+ void WriteValue(float value) final;
83
+ void WriteValue(double value) final;
84
+ void WriteValue(interval_t value) final;
85
+ void WriteValue(const string_t value) final;
86
+ void WriteValue(const string &value) final;
87
+ void WriteValue(const char *value) final;
88
+ void WriteValue(bool value) final;
89
+ void WriteDataPtr(const_data_ptr_t ptr, idx_t count) final;
90
+ };
91
+
92
+ } // namespace duckdb
@@ -10,7 +10,7 @@
10
10
 
11
11
  #include "duckdb/common/field_writer.hpp"
12
12
  #include "duckdb/common/serializer.hpp"
13
- #include "duckdb/common/serializer/enum_serializer.hpp"
13
+ #include "duckdb/common/enum_util.hpp"
14
14
  #include "duckdb/common/serializer/serialization_traits.hpp"
15
15
  #include "duckdb/common/types/interval.hpp"
16
16
  #include "duckdb/common/types/string_type.hpp"
@@ -294,8 +294,12 @@ private:
294
294
  // Deserialize a Enum
295
295
  template <typename T = void>
296
296
  inline typename std::enable_if<std::is_enum<T>::value, T>::type Read() {
297
- auto str = ReadString();
298
- return EnumSerializer::StringToEnum<T>(str.c_str());
297
+ if (deserialize_enum_from_string) {
298
+ auto str = ReadString();
299
+ return EnumUtil::FromString<T>(str.c_str());
300
+ } else {
301
+ return (T)Read<typename std::underlying_type<T>::type>();
302
+ }
299
303
  }
300
304
 
301
305
  // Deserialize a interval_t
@@ -10,7 +10,7 @@
10
10
 
11
11
  #include "duckdb/common/field_writer.hpp"
12
12
  #include "duckdb/common/serializer.hpp"
13
- #include "duckdb/common/serializer/enum_serializer.hpp"
13
+ #include "duckdb/common/enum_util.hpp"
14
14
  #include "duckdb/common/serializer/serialization_traits.hpp"
15
15
  #include "duckdb/common/types/interval.hpp"
16
16
  #include "duckdb/common/types/string_type.hpp"
@@ -39,7 +39,7 @@ public:
39
39
  SetTag(tag);
40
40
  if (serialize_enum_as_string) {
41
41
  // Use the enum serializer to lookup tostring function
42
- auto str = EnumSerializer::EnumToString(value);
42
+ auto str = EnumUtil::ToChars(value);
43
43
  WriteValue(str);
44
44
  } else {
45
45
  // Use the underlying type
@@ -229,7 +229,7 @@ private:
229
229
  //! The number of entries stored in the TupleDataCollection
230
230
  idx_t count;
231
231
  //! The data segments of the TupleDataCollection
232
- vector<TupleDataSegment> segments;
232
+ unsafe_vector<TupleDataSegment> segments;
233
233
  //! The set of scatter functions
234
234
  vector<TupleDataScatterFunction> scatter_functions;
235
235
  //! The set of gather functions
@@ -109,7 +109,7 @@ public:
109
109
  //! The allocator for this segment
110
110
  shared_ptr<TupleDataAllocator> allocator;
111
111
  //! The chunks of this segment
112
- vector<TupleDataChunk> chunks;
112
+ unsafe_vector<TupleDataChunk> chunks;
113
113
  //! The tuple count of this segment
114
114
  idx_t count;
115
115
 
@@ -457,6 +457,7 @@ struct AggregateStateType {
457
457
  DUCKDB_API static const aggregate_state_t &GetStateType(const LogicalType &type);
458
458
  };
459
459
 
460
+ // **DEPRECATED**: Use EnumUtil directly instead.
460
461
  DUCKDB_API string LogicalTypeIdToString(LogicalTypeId type);
461
462
 
462
463
  DUCKDB_API LogicalTypeId TransformStringToLogicalTypeId(const string &str);
@@ -10,22 +10,31 @@
10
10
 
11
11
  #include "duckdb/common/assert.hpp"
12
12
  #include "duckdb/common/typedefs.hpp"
13
+ #include "duckdb/common/likely.hpp"
14
+ #include "duckdb/common/exception.hpp"
13
15
  #include <vector>
14
16
 
15
17
  namespace duckdb {
16
18
 
17
- // TODO: inline this, needs changes to 'exception.hpp' and other headers to avoid circular dependency
18
- void AssertIndexInBounds(idx_t index, idx_t size);
19
-
20
- template <class _Tp, class _Allocator = std::allocator<_Tp>>
21
- class vector : public std::vector<_Tp, _Allocator> {
19
+ template <class _Tp, bool SAFE = true>
20
+ class vector : public std::vector<_Tp, std::allocator<_Tp>> {
22
21
  public:
23
- using original = std::vector<_Tp, _Allocator>;
22
+ using original = std::vector<_Tp, std::allocator<_Tp>>;
24
23
  using original::original;
25
24
  using size_type = typename original::size_type;
26
25
  using const_reference = typename original::const_reference;
27
26
  using reference = typename original::reference;
28
27
 
28
+ static inline void AssertIndexInBounds(idx_t index, idx_t size) {
29
+ #if defined(DUCKDB_DEBUG_NO_SAFETY) || defined(DUCKDB_CLANG_TIDY)
30
+ return;
31
+ #else
32
+ if (DUCKDB_UNLIKELY(index >= size)) {
33
+ throw InternalException("Attempted to access index %ld within vector of size %ld", index, size);
34
+ }
35
+ #endif
36
+ }
37
+
29
38
  #ifdef DUCKDB_CLANG_TIDY
30
39
  // This is necessary to tell clang-tidy that it reinitializes the variable after a move
31
40
  [[clang::reinitializes]]
@@ -40,19 +49,57 @@ public:
40
49
  vector() = default;
41
50
  vector(original &&other) : original(std::move(other)) {
42
51
  }
52
+ template <bool _SAFE>
53
+ vector(vector<_Tp, _SAFE> &&other) : original(std::move(other)) {
54
+ }
43
55
 
44
- typename original::reference operator[](typename original::size_type __n) {
45
- #ifdef DEBUG
46
- AssertIndexInBounds(__n, original::size());
47
- #endif
56
+ template <bool _SAFE = false>
57
+ inline typename original::reference get(typename original::size_type __n) {
58
+ if (_SAFE) {
59
+ AssertIndexInBounds(__n, original::size());
60
+ }
48
61
  return original::operator[](__n);
49
62
  }
50
- typename original::const_reference operator[](typename original::size_type __n) const {
51
- #ifdef DEBUG
52
- AssertIndexInBounds(__n, original::size());
53
- #endif
63
+
64
+ template <bool _SAFE = false>
65
+ inline typename original::const_reference get(typename original::size_type __n) const {
66
+ if (_SAFE) {
67
+ AssertIndexInBounds(__n, original::size());
68
+ }
54
69
  return original::operator[](__n);
55
70
  }
71
+
72
+ typename original::reference operator[](typename original::size_type __n) {
73
+ return get<SAFE>(__n);
74
+ }
75
+ typename original::const_reference operator[](typename original::size_type __n) const {
76
+ return get<SAFE>(__n);
77
+ }
78
+
79
+ typename original::reference front() {
80
+ return get<SAFE>(0);
81
+ }
82
+
83
+ typename original::const_reference front() const {
84
+ return get<SAFE>(0);
85
+ }
86
+
87
+ typename original::reference back() {
88
+ if (original::empty()) {
89
+ throw InternalException("'back' called on an empty vector!");
90
+ }
91
+ return get<SAFE>(original::size() - 1);
92
+ }
93
+
94
+ typename original::const_reference back() const {
95
+ if (original::empty()) {
96
+ throw InternalException("'back' called on an empty vector!");
97
+ }
98
+ return get<SAFE>(original::size() - 1);
99
+ }
56
100
  };
57
101
 
102
+ template <typename T>
103
+ using unsafe_vector = vector<T, false>;
104
+
58
105
  } // namespace duckdb
@@ -102,9 +102,10 @@ public:
102
102
  //! Add the given data to the HT, computing the aggregates grouped by the
103
103
  //! data in the group chunk. When resize = true, aggregates will not be
104
104
  //! computed but instead just assigned.
105
- idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload, const vector<idx_t> &filter);
105
+ idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload,
106
+ const unsafe_vector<idx_t> &filter);
106
107
  idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, Vector &group_hashes, DataChunk &payload,
107
- const vector<idx_t> &filter);
108
+ const unsafe_vector<idx_t> &filter);
108
109
  idx_t AddChunk(AggregateHTAppendState &state, DataChunk &groups, DataChunk &payload, AggregateType filter);
109
110
 
110
111
  //! Scan the HT starting from the scan_position until the result and group
@@ -21,7 +21,7 @@ public:
21
21
 
22
22
  public:
23
23
  // The indices of the aggregates that are distinct
24
- vector<idx_t> indices;
24
+ unsafe_vector<idx_t> indices;
25
25
  // The amount of radix_tables that are occupied
26
26
  idx_t table_count;
27
27
  //! Occupied tables, not equal to indices if aggregates share input data
@@ -34,7 +34,7 @@ public:
34
34
 
35
35
  public:
36
36
  static unique_ptr<DistinctAggregateCollectionInfo> Create(vector<unique_ptr<Expression>> &aggregates);
37
- const vector<idx_t> &Indices() const;
37
+ const unsafe_vector<idx_t> &Indices() const;
38
38
  bool AnyDistinct() const;
39
39
 
40
40
  private:
@@ -42,7 +42,7 @@ public:
42
42
  const vector<vector<idx_t>> &GetGroupingFunctions() const;
43
43
 
44
44
  void InitializeGroupby(vector<unique_ptr<Expression>> groups, vector<unique_ptr<Expression>> expressions,
45
- vector<vector<idx_t>> grouping_functions);
45
+ vector<unsafe_vector<idx_t>> grouping_functions);
46
46
 
47
47
  //! Initialize a GroupedAggregateData object for use with distinct aggregates
48
48
  void InitializeDistinct(const unique_ptr<Expression> &aggregate, const vector<unique_ptr<Expression>> *groups_p);
@@ -50,7 +50,7 @@ public:
50
50
  private:
51
51
  void InitializeDistinctGroups(const vector<unique_ptr<Expression>> *groups);
52
52
  void InitializeGroupbyGroups(vector<unique_ptr<Expression>> groups);
53
- void SetGroupingFunctions(vector<vector<idx_t>> &functions);
53
+ void SetGroupingFunctions(vector<unsafe_vector<idx_t>> &functions);
54
54
  };
55
55
 
56
56
  } // namespace duckdb
@@ -67,7 +67,7 @@ public:
67
67
  vector<unique_ptr<Expression>> groups, idx_t estimated_cardinality);
68
68
  PhysicalHashAggregate(ClientContext &context, vector<LogicalType> types, vector<unique_ptr<Expression>> expressions,
69
69
  vector<unique_ptr<Expression>> groups, vector<GroupingSet> grouping_sets,
70
- vector<vector<idx_t>> grouping_functions, idx_t estimated_cardinality);
70
+ vector<unsafe_vector<idx_t>> grouping_functions, idx_t estimated_cardinality);
71
71
 
72
72
  //! The grouping sets
73
73
  GroupedAggregateData grouped_aggregate_data;
@@ -80,8 +80,8 @@ public:
80
80
  vector<LogicalType> input_group_types;
81
81
 
82
82
  // Filters given to Sink and friends
83
- vector<idx_t> non_distinct_filter;
84
- vector<idx_t> distinct_filter;
83
+ unsafe_vector<idx_t> non_distinct_filter;
84
+ unsafe_vector<idx_t> distinct_filter;
85
85
 
86
86
  unordered_map<Expression *, size_t> filter_indexes;
87
87
 
@@ -0,0 +1,68 @@
1
+ //===----------------------------------------------------------------------===//
2
+ // DuckDB
3
+ //
4
+ // duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp
5
+ //
6
+ //
7
+ //===----------------------------------------------------------------------===//
8
+
9
+ #pragma once
10
+
11
+ #include "duckdb/execution/physical_operator.hpp"
12
+ #include "duckdb/parser/parsed_data/copy_info.hpp"
13
+ #include "duckdb/function/copy_function.hpp"
14
+ #include "duckdb/common/file_system.hpp"
15
+ #include "duckdb/common/filename_pattern.hpp"
16
+
17
+ namespace duckdb {
18
+
19
+ //! Copy the contents of a query into a table
20
+ class PhysicalBatchCopyToFile : public PhysicalOperator {
21
+ public:
22
+ static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::BATCH_COPY_TO_FILE;
23
+
24
+ public:
25
+ PhysicalBatchCopyToFile(vector<LogicalType> types, CopyFunction function, unique_ptr<FunctionData> bind_data,
26
+ idx_t estimated_cardinality);
27
+
28
+ CopyFunction function;
29
+ unique_ptr<FunctionData> bind_data;
30
+ string file_path;
31
+ bool use_tmp_file;
32
+
33
+ public:
34
+ // Source interface
35
+ SourceResultType GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const override;
36
+
37
+ bool IsSource() const override {
38
+ return true;
39
+ }
40
+
41
+ public:
42
+ // Sink interface
43
+ SinkResultType Sink(ExecutionContext &context, DataChunk &chunk, OperatorSinkInput &input) const override;
44
+ void Combine(ExecutionContext &context, GlobalSinkState &gstate, LocalSinkState &lstate) const override;
45
+ SinkFinalizeType Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
46
+ GlobalSinkState &gstate) const override;
47
+ unique_ptr<LocalSinkState> GetLocalSinkState(ExecutionContext &context) const override;
48
+ unique_ptr<GlobalSinkState> GetGlobalSinkState(ClientContext &context) const override;
49
+ void NextBatch(ExecutionContext &context, GlobalSinkState &state, LocalSinkState &lstate_p) const override;
50
+
51
+ bool RequiresBatchIndex() const override {
52
+ return true;
53
+ }
54
+
55
+ bool IsSink() const override {
56
+ return true;
57
+ }
58
+
59
+ bool ParallelSink() const override {
60
+ return true;
61
+ }
62
+
63
+ private:
64
+ void PrepareBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t batch_index,
65
+ unique_ptr<ColumnDataCollection> collection) const;
66
+ void FlushBatchData(ClientContext &context, GlobalSinkState &gstate_p, idx_t min_index) const;
67
+ };
68
+ } // namespace duckdb
@@ -67,5 +67,7 @@ public:
67
67
  bool ParallelSink() const override {
68
68
  return per_thread_output || partition_output || parallel;
69
69
  }
70
+
71
+ static void MoveTmpFile(ClientContext &context, const string &tmp_file_path);
70
72
  };
71
73
  } // namespace duckdb