duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
  4. package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
  5. package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
  6. package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
  7. package/src/duckdb/extension/json/json_scan.cpp +1 -1
  8. package/src/duckdb/extension/json/json_serializer.cpp +26 -69
  9. package/src/duckdb/src/common/enum_util.cpp +119 -7
  10. package/src/duckdb/src/common/extra_type_info.cpp +7 -3
  11. package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
  12. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
  13. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
  14. package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
  15. package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
  16. package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
  17. package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
  18. package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
  19. package/src/duckdb/src/common/types/interval.cpp +3 -0
  20. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
  21. package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
  22. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
  23. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
  24. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
  25. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
  26. package/src/duckdb/src/common/types/value.cpp +63 -42
  27. package/src/duckdb/src/common/types/vector.cpp +33 -67
  28. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
  29. package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
  30. package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
  31. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
  32. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
  33. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
  34. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
  36. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
  37. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
  38. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
  39. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
  40. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
  41. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
  42. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
  43. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
  44. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
  45. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
  46. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
  47. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
  48. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
  49. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  50. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
  51. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
  52. package/src/duckdb/src/execution/window_executor.cpp +6 -5
  53. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
  54. package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
  55. package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
  56. package/src/duckdb/src/function/table/read_csv.cpp +150 -136
  57. package/src/duckdb/src/function/table/table_scan.cpp +0 -2
  58. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  59. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
  60. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
  61. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
  62. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
  63. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
  64. package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
  65. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
  66. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
  67. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
  68. package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
  69. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
  70. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
  71. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
  72. package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
  73. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
  74. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
  75. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
  76. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
  77. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  78. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
  79. package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
  80. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
  81. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
  82. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
  83. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
  84. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
  85. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
  86. package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
  88. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
  89. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
  90. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
  91. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
  92. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
  93. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
  94. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
  95. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
  97. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
  98. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
  99. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
  100. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
  101. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
  102. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
  103. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
  104. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
  105. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
  106. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  107. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
  108. package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
  110. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  111. package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
  112. package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
  113. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
  114. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
  115. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
  116. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
  117. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
  119. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
  120. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
  121. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
  122. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
  123. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  124. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
  125. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  126. package/src/duckdb/src/include/duckdb.h +12 -0
  127. package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
  128. package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
  129. package/src/duckdb/src/main/client_verify.cpp +1 -0
  130. package/src/duckdb/src/main/config.cpp +2 -2
  131. package/src/duckdb/src/main/connection.cpp +3 -3
  132. package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
  133. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
  134. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
  135. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
  136. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
  137. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
  138. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
  139. package/src/duckdb/src/planner/logical_operator.cpp +1 -1
  140. package/src/duckdb/src/planner/planner.cpp +1 -1
  141. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  142. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
  143. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
  144. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
  145. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
  146. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
  147. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
  148. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
  149. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
  150. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
  151. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
  152. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
  153. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
  154. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
  155. package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
  156. package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
  157. package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
  158. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
  159. package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
  160. package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
  161. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  162. package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
  163. package/src/duckdb/src/storage/table/row_group.cpp +68 -1
  164. package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
  165. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  166. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
  167. package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
  168. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
  169. package/src/duckdb/ub_src_execution.cpp +0 -2
  170. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
  171. package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
  172. package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
  173. package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
  174. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  175. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  176. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  177. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  178. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
  179. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
  180. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
@@ -8,19 +8,19 @@
8
8
 
9
9
  #pragma once
10
10
 
11
- #include "duckdb/execution/operator/persistent/csv_reader_options.hpp"
11
+ #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
12
12
  #include "duckdb/main/relation/table_function_relation.hpp"
13
13
 
14
14
  namespace duckdb {
15
15
 
16
- struct BufferedCSVReaderOptions;
16
+ struct CSVReaderOptions;
17
17
 
18
18
  class ReadCSVRelation : public TableFunctionRelation {
19
19
  public:
20
- ReadCSVRelation(const std::shared_ptr<ClientContext> &context, const string &csv_file,
21
- vector<ColumnDefinition> columns, string alias = string());
22
- ReadCSVRelation(const std::shared_ptr<ClientContext> &context, const string &csv_file,
23
- BufferedCSVReaderOptions options, string alias = string());
20
+ ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, vector<ColumnDefinition> columns,
21
+ string alias = string());
22
+ ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file, CSVReaderOptions options,
23
+ string alias = string());
24
24
 
25
25
  string alias;
26
26
  bool auto_detect;
@@ -18,7 +18,7 @@ class Task;
18
18
 
19
19
  class Event : public std::enable_shared_from_this<Event> {
20
20
  public:
21
- Event(Executor &executor);
21
+ explicit Event(Executor &executor);
22
22
  virtual ~Event() = default;
23
23
 
24
24
  public:
@@ -52,6 +52,17 @@ public:
52
52
  virtual void PrintPipeline() {
53
53
  }
54
54
 
55
+ template <class TARGET>
56
+ TARGET &Cast() {
57
+ D_ASSERT(dynamic_cast<TARGET *>(this));
58
+ return reinterpret_cast<TARGET &>(*this);
59
+ }
60
+ template <class TARGET>
61
+ const TARGET &Cast() const {
62
+ D_ASSERT(dynamic_cast<const TARGET *>(this));
63
+ return reinterpret_cast<const TARGET &>(*this);
64
+ }
65
+
55
66
  protected:
56
67
  Executor &executor;
57
68
  //! The current threads working on the event
@@ -14,6 +14,9 @@
14
14
 
15
15
  namespace duckdb {
16
16
 
17
+ class FormatSerializer;
18
+ class FormatDeserializer;
19
+
17
20
  class Block : public FileBuffer {
18
21
  public:
19
22
  Block(Allocator &allocator, block_id_t id);
@@ -51,6 +54,9 @@ struct MetaBlockPointer {
51
54
  }
52
55
  block_id_t GetBlockId();
53
56
  uint32_t GetBlockIndex();
57
+
58
+ void FormatSerialize(FormatSerializer &serializer) const;
59
+ static MetaBlockPointer FormatDeserialize(FormatDeserializer &source);
54
60
  };
55
61
 
56
62
  } // namespace duckdb
@@ -97,6 +97,9 @@ public:
97
97
  inline const idx_t &GetMemoryUsage() const {
98
98
  return memory_usage;
99
99
  }
100
+ bool IsUnloaded() {
101
+ return state == BlockState::BLOCK_UNLOADED;
102
+ }
100
103
 
101
104
  private:
102
105
  static BufferHandle Load(shared_ptr<BlockHandle> &handle, unique_ptr<FileBuffer> buffer = nullptr);
@@ -19,6 +19,9 @@
19
19
  namespace duckdb {
20
20
  struct SelectionVector;
21
21
 
22
+ class FormatSerializer;
23
+ class FormatDeserializer;
24
+
22
25
  class Serializer;
23
26
  class Deserializer;
24
27
  class FieldWriter;
@@ -94,17 +97,18 @@ public:
94
97
 
95
98
  void Serialize(Serializer &serializer) const;
96
99
  void Serialize(FieldWriter &writer) const;
97
-
98
- idx_t GetDistinctCount();
99
-
100
100
  static BaseStatistics Deserialize(Deserializer &source, LogicalType type);
101
101
 
102
+ void FormatSerialize(FormatSerializer &serializer) const;
103
+ static BaseStatistics FormatDeserialize(FormatDeserializer &deserializer);
104
+
102
105
  //! Verify that a vector does not violate the statistics
103
106
  void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const;
104
107
  void Verify(Vector &vector, idx_t count) const;
105
108
 
106
109
  string ToString() const;
107
110
 
111
+ idx_t GetDistinctCount();
108
112
  static BaseStatistics FromConstant(const Value &input);
109
113
 
110
114
  private:
@@ -12,6 +12,7 @@
12
12
  #include "duckdb/storage/statistics/distinct_statistics.hpp"
13
13
 
14
14
  namespace duckdb {
15
+ class FormatSerializer;
15
16
 
16
17
  class ColumnStatistics {
17
18
  public:
@@ -35,6 +36,9 @@ public:
35
36
  void Serialize(Serializer &serializer) const;
36
37
  static shared_ptr<ColumnStatistics> Deserialize(Deserializer &source, const LogicalType &type);
37
38
 
39
+ void FormatSerialize(FormatSerializer &serializer) const;
40
+ static shared_ptr<ColumnStatistics> FormatDeserialize(FormatDeserializer &source);
41
+
38
42
  private:
39
43
  BaseStatistics stats;
40
44
  //! The approximate count distinct stats of the column
@@ -16,6 +16,8 @@ namespace duckdb {
16
16
  class Serializer;
17
17
  class Deserializer;
18
18
  class Vector;
19
+ class FormatSerializer;
20
+ class FormatDeserializer;
19
21
 
20
22
  class DistinctStatistics {
21
23
  public:
@@ -48,6 +50,9 @@ public:
48
50
 
49
51
  static bool TypeIsSupported(const LogicalType &type);
50
52
 
53
+ void FormatSerialize(FormatSerializer &serializer) const;
54
+ static unique_ptr<DistinctStatistics> FormatDeserialize(FormatDeserializer &deserializer);
55
+
51
56
  private:
52
57
  //! For distinct statistics we sample the input to speed up insertions
53
58
  static constexpr const double SAMPLE_RATE = 0.1;
@@ -31,6 +31,9 @@ struct ListStats {
31
31
  DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
32
32
  DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
33
33
 
34
+ DUCKDB_API static void FormatSerialize(const BaseStatistics &stats, FormatSerializer &serializer);
35
+ DUCKDB_API static BaseStatistics FormatDeserialize(FormatDeserializer &deserializer, LogicalType type);
36
+
34
37
  DUCKDB_API static string ToString(const BaseStatistics &stats);
35
38
 
36
39
  DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other);
@@ -64,6 +64,9 @@ struct NumericStats {
64
64
  DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
65
65
  DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
66
66
 
67
+ DUCKDB_API static void FormatSerialize(const BaseStatistics &stats, FormatSerializer &serializer);
68
+ DUCKDB_API static BaseStatistics FormatDeserialize(FormatDeserializer &deserializer, LogicalType type);
69
+
67
70
  DUCKDB_API static string ToString(const BaseStatistics &stats);
68
71
 
69
72
  template <class T>
@@ -61,6 +61,9 @@ struct StringStats {
61
61
  DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
62
62
  DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
63
63
 
64
+ DUCKDB_API static void FormatSerialize(const BaseStatistics &stats, FormatSerializer &serializer);
65
+ DUCKDB_API static BaseStatistics FormatDeserialize(FormatDeserializer &deserializer, LogicalType type);
66
+
64
67
  DUCKDB_API static string ToString(const BaseStatistics &stats);
65
68
 
66
69
  DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type,
@@ -32,6 +32,9 @@ struct StructStats {
32
32
  DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer);
33
33
  DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type);
34
34
 
35
+ DUCKDB_API static void FormatSerialize(const BaseStatistics &stats, FormatSerializer &serializer);
36
+ DUCKDB_API static BaseStatistics FormatDeserialize(FormatDeserializer &deserializer, LogicalType type);
37
+
35
38
  DUCKDB_API static string ToString(const BaseStatistics &stats);
36
39
 
37
40
  DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other);
@@ -18,6 +18,9 @@ struct SelectionVector;
18
18
  class Transaction;
19
19
  struct TransactionData;
20
20
 
21
+ class FormatSerializer;
22
+ class FormatDeserializer;
23
+
21
24
  enum class ChunkInfoType : uint8_t { CONSTANT_INFO, VECTOR_INFO, EMPTY_INFO };
22
25
 
23
26
  class ChunkInfo {
@@ -46,6 +49,9 @@ public:
46
49
  virtual void Serialize(Serializer &serialize) = 0;
47
50
  static unique_ptr<ChunkInfo> Deserialize(Deserializer &source);
48
51
 
52
+ virtual void FormatSerialize(FormatSerializer &serializer) const = 0;
53
+ static unique_ptr<ChunkInfo> FormatDeserialize(FormatDeserializer &deserializer);
54
+
49
55
  public:
50
56
  template <class TARGET>
51
57
  TARGET &Cast() {
@@ -85,10 +91,13 @@ public:
85
91
  void Serialize(Serializer &serialize) override;
86
92
  static unique_ptr<ChunkInfo> Deserialize(Deserializer &source);
87
93
 
94
+ void FormatSerialize(FormatSerializer &serializer) const override;
95
+ static unique_ptr<ChunkInfo> FormatDeserialize(FormatDeserializer &deserializer);
96
+
88
97
  private:
89
98
  template <class OP>
90
99
  idx_t TemplatedGetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector,
91
- idx_t max_count);
100
+ idx_t max_count) const;
92
101
  };
93
102
 
94
103
  class ChunkVectorInfo : public ChunkInfo {
@@ -109,7 +118,7 @@ public:
109
118
 
110
119
  public:
111
120
  idx_t GetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector,
112
- idx_t max_count);
121
+ idx_t max_count) const;
113
122
  idx_t GetSelVector(TransactionData transaction, SelectionVector &sel_vector, idx_t max_count) override;
114
123
  idx_t GetCommittedSelVector(transaction_t min_start_id, transaction_t min_transaction_id,
115
124
  SelectionVector &sel_vector, idx_t max_count) override;
@@ -130,10 +139,13 @@ public:
130
139
  void Serialize(Serializer &serialize) override;
131
140
  static unique_ptr<ChunkInfo> Deserialize(Deserializer &source);
132
141
 
142
+ void FormatSerialize(FormatSerializer &serializer) const override;
143
+ static unique_ptr<ChunkInfo> FormatDeserialize(FormatDeserializer &deserializer);
144
+
133
145
  private:
134
146
  template <class OP>
135
147
  idx_t TemplatedGetSelVector(transaction_t start_time, transaction_t transaction_id, SelectionVector &sel_vector,
136
- idx_t max_count);
148
+ idx_t max_count) const;
137
149
  };
138
150
 
139
151
  } // namespace duckdb
@@ -147,6 +147,10 @@ public:
147
147
 
148
148
  void NextVector(CollectionScanState &state);
149
149
 
150
+ // Serialization
151
+ static void FormatSerialize(RowGroupPointer &pointer, FormatSerializer &serializer);
152
+ static RowGroupPointer FormatDeserialize(FormatDeserializer &deserializer);
153
+
150
154
  private:
151
155
  ChunkInfo *GetChunkInfo(idx_t vector_idx);
152
156
  ColumnData &GetColumn(storage_t c);
@@ -16,6 +16,8 @@
16
16
  namespace duckdb {
17
17
  class ColumnList;
18
18
  class PersistentTableData;
19
+ class FormatSerializer;
20
+ class FormatDeserializer;
19
21
 
20
22
  class TableStatisticsLock {
21
23
  public:
@@ -50,6 +52,9 @@ public:
50
52
  void Serialize(Serializer &serializer);
51
53
  void Deserialize(Deserializer &source, ColumnList &columns);
52
54
 
55
+ void FormatSerialize(FormatSerializer &serializer);
56
+ void FormatDeserialize(FormatDeserializer &deserializer, ColumnList &columns);
57
+
53
58
  private:
54
59
  //! The statistics lock
55
60
  mutex stats_lock;
@@ -23,4 +23,10 @@ public:
23
23
  static unique_ptr<StatementVerifier> Create(const SQLStatement &statement);
24
24
  };
25
25
 
26
+ class DeserializedStatementVerifierNoDefaultV2 : public StatementVerifier {
27
+ public:
28
+ explicit DeserializedStatementVerifierNoDefaultV2(unique_ptr<SQLStatement> statement_p);
29
+ static unique_ptr<StatementVerifier> Create(const SQLStatement &statement);
30
+ };
31
+
26
32
  } // namespace duckdb
@@ -19,6 +19,7 @@ enum class VerificationType : uint8_t {
19
19
  COPIED,
20
20
  DESERIALIZED,
21
21
  DESERIALIZED_V2,
22
+ DESERIALIZED_V2_NO_DEFAULT,
22
23
  PARSED,
23
24
  UNOPTIMIZED,
24
25
  NO_OPERATOR_CACHING,
@@ -1372,6 +1372,18 @@ The resulting type should be destroyed with `duckdb_destroy_logical_type`.
1372
1372
  DUCKDB_API duckdb_logical_type duckdb_create_union_type(duckdb_logical_type member_types, const char **member_names,
1373
1373
  idx_t member_count);
1374
1374
 
1375
+ /*!
1376
+ Creates a STRUCT type from the passed member name and type arrays.
1377
+ The resulting type should be destroyed with `duckdb_destroy_logical_type`.
1378
+
1379
+ * member_types: The array of types that the struct should consist of.
1380
+ * member_names: The array of names that the struct should consist of.
1381
+ * member_count: The number of members that were specified for both arrays.
1382
+ * returns: The logical type.
1383
+ */
1384
+ DUCKDB_API duckdb_logical_type duckdb_create_struct_type(duckdb_logical_type *member_types, const char **member_names,
1385
+ idx_t member_count);
1386
+
1375
1387
  /*!
1376
1388
  Creates a `duckdb_logical_type` of type decimal with the specified width and scale
1377
1389
  The resulting type should be destroyed with `duckdb_destroy_logical_type`.
@@ -51,6 +51,28 @@ duckdb_logical_type duckdb_create_union_type(duckdb_logical_type member_types_p,
51
51
  return reinterpret_cast<duckdb_logical_type>(mtype);
52
52
  }
53
53
 
54
+ duckdb_logical_type duckdb_create_struct_type(duckdb_logical_type *member_types_p, const char **member_names,
55
+ idx_t member_count) {
56
+ if (!member_types_p || !member_names) {
57
+ return nullptr;
58
+ }
59
+ duckdb::LogicalType **member_types = (duckdb::LogicalType **)member_types_p;
60
+ for (idx_t i = 0; i < member_count; i++) {
61
+ if (!member_names[i] || !member_types[i]) {
62
+ return nullptr;
63
+ }
64
+ }
65
+
66
+ duckdb::LogicalType *mtype = new duckdb::LogicalType;
67
+ duckdb::child_list_t<duckdb::LogicalType> members;
68
+
69
+ for (idx_t i = 0; i < member_count; i++) {
70
+ members.push_back(make_pair(member_names[i], *member_types[i]));
71
+ }
72
+ *mtype = duckdb::LogicalType::STRUCT(members);
73
+ return reinterpret_cast<duckdb_logical_type>(mtype);
74
+ }
75
+
54
76
  duckdb_logical_type duckdb_create_map_type(duckdb_logical_type key_type, duckdb_logical_type value_type) {
55
77
  if (!key_type || !value_type) {
56
78
  return nullptr;
@@ -1,5 +1,6 @@
1
1
  #include "duckdb/main/client_context_file_opener.hpp"
2
2
 
3
+ #include "duckdb/common/file_opener.hpp"
3
4
  #include "duckdb/main/client_context.hpp"
4
5
 
5
6
  namespace duckdb {
@@ -8,6 +9,11 @@ bool ClientContextFileOpener::TryGetCurrentSetting(const string &key, Value &res
8
9
  return context.TryGetCurrentSetting(key, result);
9
10
  }
10
11
 
12
+ // LCOV_EXCL_START
13
+ bool ClientContextFileOpener::TryGetCurrentSetting(const string &key, Value &result, FileOpenerInfo &) {
14
+ return context.TryGetCurrentSetting(key, result);
15
+ }
16
+
11
17
  ClientContext *FileOpener::TryGetClientContext(FileOpener *opener) {
12
18
  if (!opener) {
13
19
  return nullptr;
@@ -22,4 +28,15 @@ bool FileOpener::TryGetCurrentSetting(FileOpener *opener, const string &key, Val
22
28
  return opener->TryGetCurrentSetting(key, result);
23
29
  }
24
30
 
31
+ bool FileOpener::TryGetCurrentSetting(FileOpener *opener, const string &key, Value &result, FileOpenerInfo &info) {
32
+ if (!opener) {
33
+ return false;
34
+ }
35
+ return opener->TryGetCurrentSetting(key, result, info);
36
+ }
37
+
38
+ bool FileOpener::TryGetCurrentSetting(const string &key, Value &result, FileOpenerInfo &info) {
39
+ return this->TryGetCurrentSetting(key, result);
40
+ }
41
+ // LCOV_EXCL_STOP
25
42
  } // namespace duckdb
@@ -41,6 +41,7 @@ PreservedError ClientContext::VerifyQuery(ClientContextLock &lock, const string
41
41
  statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::COPIED, stmt));
42
42
  statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::DESERIALIZED, stmt));
43
43
  statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::DESERIALIZED_V2, stmt));
44
+ statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::DESERIALIZED_V2_NO_DEFAULT, stmt));
44
45
  statement_verifiers.emplace_back(StatementVerifier::Create(VerificationType::UNOPTIMIZED, stmt));
45
46
  prepared_statement_verifier = StatementVerifier::Create(VerificationType::PREPARED, stmt);
46
47
  #ifdef DUCKDB_DEBUG_ASYNC_SINK_SOURCE
@@ -285,7 +285,7 @@ idx_t CGroupBandwidthQuota(idx_t physical_cores, FileSystem &fs) {
285
285
  }
286
286
  }
287
287
 
288
- idx_t GetSystemMaxThreadsInternal(FileSystem &fs) {
288
+ idx_t DBConfig::GetSystemMaxThreads(FileSystem &fs) {
289
289
  #ifndef DUCKDB_NO_THREADS
290
290
  idx_t physical_cores = std::thread::hardware_concurrency();
291
291
  #ifdef __linux__
@@ -301,7 +301,7 @@ idx_t GetSystemMaxThreadsInternal(FileSystem &fs) {
301
301
 
302
302
  void DBConfig::SetDefaultMaxThreads() {
303
303
  #ifndef DUCKDB_NO_THREADS
304
- options.maximum_threads = GetSystemMaxThreadsInternal(*file_system);
304
+ options.maximum_threads = GetSystemMaxThreads(*file_system);
305
305
  #else
306
306
  options.maximum_threads = 1;
307
307
  #endif
@@ -1,7 +1,7 @@
1
1
  #include "duckdb/main/connection.hpp"
2
2
 
3
3
  #include "duckdb/common/types/column/column_data_collection.hpp"
4
- #include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
4
+ #include "duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp"
5
5
  #include "duckdb/function/table/read_csv.hpp"
6
6
  #include "duckdb/main/appender.hpp"
7
7
  #include "duckdb/main/client_context.hpp"
@@ -219,11 +219,11 @@ shared_ptr<Relation> Connection::Values(const string &values, const vector<strin
219
219
  }
220
220
 
221
221
  shared_ptr<Relation> Connection::ReadCSV(const string &csv_file) {
222
- BufferedCSVReaderOptions options;
222
+ CSVReaderOptions options;
223
223
  return ReadCSV(csv_file, options);
224
224
  }
225
225
 
226
- shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, BufferedCSVReaderOptions &options) {
226
+ shared_ptr<Relation> Connection::ReadCSV(const string &csv_file, CSVReaderOptions &options) {
227
227
  options.file_path = csv_file;
228
228
  options.auto_detect = true;
229
229
  return make_shared<ReadCSVRelation>(context, csv_file, options);
@@ -1,18 +1,21 @@
1
1
  #include "duckdb/main/relation/read_csv_relation.hpp"
2
- #include "duckdb/parser/tableref/table_function_ref.hpp"
3
- #include "duckdb/parser/tableref/basetableref.hpp"
4
- #include "duckdb/parser/query_node/select_node.hpp"
5
- #include "duckdb/parser/expression/star_expression.hpp"
2
+
3
+ #include "duckdb/common/string_util.hpp"
4
+ #include "duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp"
5
+ #include "duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp"
6
+ #include "duckdb/execution/operator/scan/csv/csv_sniffer.hpp"
6
7
  #include "duckdb/parser/expression/columnref_expression.hpp"
7
8
  #include "duckdb/parser/expression/comparison_expression.hpp"
8
9
  #include "duckdb/parser/expression/constant_expression.hpp"
9
10
  #include "duckdb/parser/expression/function_expression.hpp"
10
- #include "duckdb/common/string_util.hpp"
11
- #include "duckdb/execution/operator/persistent/buffered_csv_reader.hpp"
11
+ #include "duckdb/parser/expression/star_expression.hpp"
12
+ #include "duckdb/parser/query_node/select_node.hpp"
13
+ #include "duckdb/parser/tableref/basetableref.hpp"
14
+ #include "duckdb/parser/tableref/table_function_ref.hpp"
12
15
 
13
16
  namespace duckdb {
14
17
 
15
- ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, const string &csv_file,
18
+ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file,
16
19
  vector<ColumnDefinition> columns_p, string alias_p)
17
20
  : TableFunctionRelation(context, "read_csv", {Value(csv_file)}, nullptr, false), alias(std::move(alias_p)),
18
21
  auto_detect(false) {
@@ -31,8 +34,8 @@ ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context,
31
34
  AddNamedParameter("columns", Value::STRUCT(std::move(column_names)));
32
35
  }
33
36
 
34
- ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context, const string &csv_file,
35
- BufferedCSVReaderOptions options, string alias_p)
37
+ ReadCSVRelation::ReadCSVRelation(const shared_ptr<ClientContext> &context, const string &csv_file,
38
+ CSVReaderOptions options, string alias_p)
36
39
  : TableFunctionRelation(context, "read_csv_auto", {Value(csv_file)}, nullptr, false), alias(std::move(alias_p)),
37
40
  auto_detect(true) {
38
41
 
@@ -42,10 +45,13 @@ ReadCSVRelation::ReadCSVRelation(const std::shared_ptr<ClientContext> &context,
42
45
 
43
46
  // Force auto_detect for this constructor
44
47
  options.auto_detect = true;
45
- BufferedCSVReader reader(*context, std::move(options));
46
-
47
- auto &types = reader.GetTypes();
48
- auto &names = reader.GetNames();
48
+ auto bm_file_handle = BaseCSVReader::OpenCSV(*context, options);
49
+ auto buffer_manager = make_shared<CSVBufferManager>(*context, std::move(bm_file_handle), options);
50
+ CSVStateMachineCache state_machine_cache;
51
+ CSVSniffer sniffer(options, buffer_manager, state_machine_cache);
52
+ auto sniffer_result = sniffer.SniffCSV();
53
+ auto &types = sniffer_result.return_types;
54
+ auto &names = sniffer_result.names;
49
55
  for (idx_t i = 0; i < types.size(); i++) {
50
56
  columns.emplace_back(names[i], types[i]);
51
57
  }
@@ -51,7 +51,7 @@ private:
51
51
  //! Debugging state: number of times blocked
52
52
  int debug_blocked_count = 0;
53
53
  //! Number of times the Finalize will block before actually returning data
54
- int debug_blocked_target_count = 1;
54
+ int debug_blocked_target_count = 10;
55
55
  #endif
56
56
  };
57
57
 
@@ -2,8 +2,6 @@
2
2
 
3
3
  #include "duckdb/common/limits.hpp"
4
4
  #include "duckdb/common/field_writer.hpp"
5
- #include "duckdb/common/serializer/format_serializer.hpp"
6
- #include "duckdb/common/serializer/format_deserializer.hpp"
7
5
 
8
6
  namespace duckdb {
9
7
 
@@ -160,12 +158,6 @@ void PivotColumnEntry::Serialize(Serializer &serializer) const {
160
158
  writer.Finalize();
161
159
  }
162
160
 
163
- void PivotColumnEntry::FormatSerialize(FormatSerializer &serializer) const {
164
- serializer.WriteProperty(100, "values", values);
165
- serializer.WriteOptionalProperty(101, "star_expr", star_expr);
166
- serializer.WriteProperty(102, "alias", alias);
167
- }
168
-
169
161
  PivotColumnEntry PivotColumnEntry::Deserialize(Deserializer &source) {
170
162
  PivotColumnEntry result;
171
163
  FieldReader reader(source);
@@ -176,14 +168,6 @@ PivotColumnEntry PivotColumnEntry::Deserialize(Deserializer &source) {
176
168
  return result;
177
169
  }
178
170
 
179
- PivotColumnEntry PivotColumnEntry::FormatDeserialize(FormatDeserializer &source) {
180
- PivotColumnEntry result;
181
- source.ReadProperty(100, "values", result.values);
182
- source.ReadOptionalProperty(101, "star_expr", result.star_expr);
183
- source.ReadProperty(102, "alias", result.alias);
184
- return result;
185
- }
186
-
187
171
  //===--------------------------------------------------------------------===//
188
172
  // PivotRef
189
173
  //===--------------------------------------------------------------------===//
@@ -5,7 +5,7 @@
5
5
  #include "duckdb/common/bind_helpers.hpp"
6
6
  #include "duckdb/common/filename_pattern.hpp"
7
7
  #include "duckdb/common/local_file_system.hpp"
8
- #include "duckdb/execution/operator/persistent/parallel_csv_reader.hpp"
8
+ #include "duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp"
9
9
  #include "duckdb/function/table/read_csv.hpp"
10
10
  #include "duckdb/main/client_context.hpp"
11
11
  #include "duckdb/main/database.hpp"