duckdb 0.8.2-dev3458.0 → 0.8.2-dev3949.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/binding.gyp +2 -0
  2. package/package.json +1 -1
  3. package/src/duckdb/extension/icu/icu_extension.cpp +5 -5
  4. package/src/duckdb/extension/json/include/json_deserializer.hpp +7 -16
  5. package/src/duckdb/extension/json/include/json_serializer.hpp +9 -15
  6. package/src/duckdb/extension/json/json_deserializer.cpp +29 -67
  7. package/src/duckdb/extension/json/json_scan.cpp +1 -1
  8. package/src/duckdb/extension/json/json_serializer.cpp +26 -69
  9. package/src/duckdb/src/common/enum_util.cpp +119 -7
  10. package/src/duckdb/src/common/extra_type_info.cpp +7 -3
  11. package/src/duckdb/src/common/radix_partitioning.cpp +8 -31
  12. package/src/duckdb/src/common/row_operations/row_aggregate.cpp +18 -3
  13. package/src/duckdb/src/common/serializer/binary_deserializer.cpp +62 -77
  14. package/src/duckdb/src/common/serializer/binary_serializer.cpp +84 -84
  15. package/src/duckdb/src/common/serializer/format_serializer.cpp +1 -1
  16. package/src/duckdb/src/common/sort/partition_state.cpp +41 -33
  17. package/src/duckdb/src/common/types/data_chunk.cpp +44 -8
  18. package/src/duckdb/src/common/types/hyperloglog.cpp +21 -0
  19. package/src/duckdb/src/common/types/interval.cpp +3 -0
  20. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +252 -126
  21. package/src/duckdb/src/common/types/row/row_layout.cpp +3 -31
  22. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +40 -32
  23. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +39 -26
  24. package/src/duckdb/src/common/types/row/tuple_data_layout.cpp +11 -1
  25. package/src/duckdb/src/common/types/row/tuple_data_segment.cpp +21 -16
  26. package/src/duckdb/src/common/types/value.cpp +63 -42
  27. package/src/duckdb/src/common/types/vector.cpp +33 -67
  28. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +3 -2
  29. package/src/duckdb/src/execution/aggregate_hashtable.cpp +222 -364
  30. package/src/duckdb/src/execution/join_hashtable.cpp +5 -6
  31. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +240 -310
  32. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +202 -173
  33. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +36 -2
  34. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/base_csv_reader.cpp +58 -162
  35. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +434 -0
  36. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +80 -0
  37. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +90 -0
  38. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +95 -0
  39. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/csv_reader_options.cpp +47 -28
  40. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +35 -0
  41. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +107 -0
  42. package/src/duckdb/src/execution/operator/{persistent → csv_scanner}/parallel_csv_reader.cpp +44 -44
  43. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +52 -0
  44. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +336 -0
  45. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +165 -0
  46. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +398 -0
  47. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_refinement.cpp +175 -0
  48. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_replacement.cpp +39 -0
  49. package/src/duckdb/src/execution/operator/join/physical_asof_join.cpp +1 -1
  50. package/src/duckdb/src/execution/operator/set/physical_recursive_cte.cpp +1 -2
  51. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +614 -574
  52. package/src/duckdb/src/execution/window_executor.cpp +6 -5
  53. package/src/duckdb/src/function/cast/cast_function_set.cpp +1 -0
  54. package/src/duckdb/src/function/scalar/strftime_format.cpp +4 -4
  55. package/src/duckdb/src/function/table/copy_csv.cpp +94 -96
  56. package/src/duckdb/src/function/table/read_csv.cpp +150 -136
  57. package/src/duckdb/src/function/table/table_scan.cpp +0 -2
  58. package/src/duckdb/src/function/table/version/pragma_version.cpp +2 -2
  59. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +24 -0
  60. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +9 -0
  61. package/src/duckdb/src/include/duckdb/common/fixed_size_map.hpp +208 -0
  62. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +3 -0
  63. package/src/duckdb/src/include/duckdb/common/perfect_map_set.hpp +2 -1
  64. package/src/duckdb/src/include/duckdb/common/printer.hpp +11 -0
  65. package/src/duckdb/src/include/duckdb/common/serializer/binary_deserializer.hpp +43 -30
  66. package/src/duckdb/src/include/duckdb/common/serializer/binary_serializer.hpp +36 -35
  67. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +18 -0
  68. package/src/duckdb/src/include/duckdb/common/serializer/encoding_util.hpp +132 -0
  69. package/src/duckdb/src/include/duckdb/common/serializer/format_deserializer.hpp +125 -150
  70. package/src/duckdb/src/include/duckdb/common/serializer/format_serializer.hpp +119 -107
  71. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +2 -1
  72. package/src/duckdb/src/include/duckdb/common/shared_ptr.hpp +8 -0
  73. package/src/duckdb/src/include/duckdb/common/sort/partition_state.hpp +13 -7
  74. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +5 -0
  75. package/src/duckdb/src/include/duckdb/common/types/hyperloglog.hpp +7 -1
  76. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +7 -0
  77. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +41 -9
  78. package/src/duckdb/src/include/duckdb/common/types/row/row_data_collection_scanner.hpp +5 -0
  79. package/src/duckdb/src/include/duckdb/common/types/row/row_layout.hpp +1 -23
  80. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +14 -8
  81. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +6 -3
  82. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_layout.hpp +7 -0
  83. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_segment.hpp +13 -8
  84. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_states.hpp +3 -2
  85. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +3 -3
  86. package/src/duckdb/src/include/duckdb/common/vector.hpp +2 -2
  87. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +125 -146
  88. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +5 -4
  89. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +4 -3
  90. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/base_csv_reader.hpp +17 -17
  91. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +72 -0
  92. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +110 -0
  93. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +103 -0
  94. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_file_handle.hpp +8 -15
  95. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_line_info.hpp +1 -1
  96. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/csv_reader_options.hpp +52 -28
  97. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +127 -0
  98. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +75 -0
  99. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +51 -0
  100. package/src/duckdb/src/include/duckdb/execution/operator/{persistent → scan/csv}/parallel_csv_reader.hpp +21 -27
  101. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +21 -0
  102. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +18 -27
  103. package/src/duckdb/src/include/duckdb/function/function_serialization.hpp +5 -6
  104. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +4 -4
  105. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +17 -12
  106. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  107. package/src/duckdb/src/include/duckdb/main/client_data.hpp +2 -1
  108. package/src/duckdb/src/include/duckdb/main/config.hpp +1 -0
  109. package/src/duckdb/src/include/duckdb/main/connection.hpp +2 -2
  110. package/src/duckdb/src/include/duckdb/main/relation/read_csv_relation.hpp +6 -6
  111. package/src/duckdb/src/include/duckdb/parallel/event.hpp +12 -1
  112. package/src/duckdb/src/include/duckdb/storage/block.hpp +6 -0
  113. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +3 -0
  114. package/src/duckdb/src/include/duckdb/storage/statistics/base_statistics.hpp +7 -3
  115. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +4 -0
  116. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -0
  117. package/src/duckdb/src/include/duckdb/storage/statistics/list_stats.hpp +3 -0
  118. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +3 -0
  119. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +3 -0
  120. package/src/duckdb/src/include/duckdb/storage/statistics/struct_stats.hpp +3 -0
  121. package/src/duckdb/src/include/duckdb/storage/table/chunk_info.hpp +15 -3
  122. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +4 -0
  123. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +5 -0
  124. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier_v2.hpp +6 -0
  125. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +1 -0
  126. package/src/duckdb/src/include/duckdb.h +12 -0
  127. package/src/duckdb/src/main/capi/logical_types-c.cpp +22 -0
  128. package/src/duckdb/src/main/client_context_file_opener.cpp +17 -0
  129. package/src/duckdb/src/main/client_verify.cpp +1 -0
  130. package/src/duckdb/src/main/config.cpp +2 -2
  131. package/src/duckdb/src/main/connection.cpp +3 -3
  132. package/src/duckdb/src/main/relation/read_csv_relation.cpp +19 -13
  133. package/src/duckdb/src/parallel/pipeline_finish_event.cpp +1 -1
  134. package/src/duckdb/src/parser/tableref/pivotref.cpp +0 -16
  135. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +1 -1
  136. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +41 -25
  137. package/src/duckdb/src/planner/expression/bound_aggregate_expression.cpp +4 -4
  138. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +10 -10
  139. package/src/duckdb/src/planner/logical_operator.cpp +1 -1
  140. package/src/duckdb/src/planner/planner.cpp +1 -1
  141. package/src/duckdb/src/storage/checkpoint_manager.cpp +4 -3
  142. package/src/duckdb/src/storage/serialization/serialize_constraint.cpp +1 -1
  143. package/src/duckdb/src/storage/serialization/serialize_create_info.cpp +5 -5
  144. package/src/duckdb/src/storage/serialization/serialize_expression.cpp +10 -10
  145. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +20 -20
  146. package/src/duckdb/src/storage/serialization/serialize_macro_function.cpp +2 -2
  147. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +118 -89
  148. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +3 -3
  149. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +27 -27
  150. package/src/duckdb/src/storage/serialization/serialize_query_node.cpp +16 -16
  151. package/src/duckdb/src/storage/serialization/serialize_result_modifier.cpp +8 -8
  152. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +1 -1
  153. package/src/duckdb/src/storage/serialization/serialize_storage.cpp +39 -0
  154. package/src/duckdb/src/storage/serialization/serialize_tableref.cpp +9 -9
  155. package/src/duckdb/src/storage/statistics/base_statistics.cpp +67 -4
  156. package/src/duckdb/src/storage/statistics/column_statistics.cpp +16 -0
  157. package/src/duckdb/src/storage/statistics/list_stats.cpp +21 -0
  158. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +126 -1
  159. package/src/duckdb/src/storage/statistics/string_stats.cpp +23 -0
  160. package/src/duckdb/src/storage/statistics/struct_stats.cpp +27 -0
  161. package/src/duckdb/src/storage/storage_info.cpp +1 -1
  162. package/src/duckdb/src/storage/table/chunk_info.cpp +82 -3
  163. package/src/duckdb/src/storage/table/row_group.cpp +68 -1
  164. package/src/duckdb/src/storage/table/table_statistics.cpp +21 -0
  165. package/src/duckdb/src/storage/wal_replay.cpp +2 -2
  166. package/src/duckdb/src/verification/deserialized_statement_verifier_v2.cpp +15 -1
  167. package/src/duckdb/src/verification/statement_verifier.cpp +2 -0
  168. package/src/duckdb/third_party/utf8proc/include/utf8proc_wrapper.hpp +8 -0
  169. package/src/duckdb/ub_src_execution.cpp +0 -2
  170. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +18 -0
  171. package/src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp +12 -0
  172. package/src/duckdb/ub_src_execution_operator_persistent.cpp +0 -12
  173. package/src/duckdb/ub_src_storage_serialization.cpp +2 -0
  174. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  175. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  176. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  177. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  178. package/src/duckdb/src/include/duckdb/execution/operator/persistent/buffered_csv_reader.hpp +0 -133
  179. package/src/duckdb/src/include/duckdb/execution/operator/persistent/csv_buffer.hpp +0 -74
  180. package/src/duckdb/src/include/duckdb/execution/partitionable_hashtable.hpp +0 -73
package/binding.gyp CHANGED
@@ -63,6 +63,8 @@
63
63
  "src/duckdb/ub_src_execution_index_art.cpp",
64
64
  "src/duckdb/ub_src_execution_nested_loop_join.cpp",
65
65
  "src/duckdb/ub_src_execution_operator_aggregate.cpp",
66
+ "src/duckdb/ub_src_execution_operator_csv_scanner.cpp",
67
+ "src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp",
66
68
  "src/duckdb/ub_src_execution_operator_filter.cpp",
67
69
  "src/duckdb/ub_src_execution_operator_helper.cpp",
68
70
  "src/duckdb/ub_src_execution_operator_join.cpp",
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "duckdb",
3
3
  "main": "./lib/duckdb.js",
4
4
  "types": "./lib/duckdb.d.ts",
5
- "version": "0.8.2-dev3458.0",
5
+ "version": "0.8.2-dev3949.0",
6
6
  "description": "DuckDB node.js API",
7
7
  "gypfile": true,
8
8
  "dependencies": {
@@ -43,13 +43,13 @@ struct IcuBindData : public FunctionData {
43
43
  UErrorCode status = U_ZERO_ERROR;
44
44
  auto locale = icu::Locale(language.c_str(), country.c_str());
45
45
  if (locale.isBogus()) {
46
- throw InternalException("Locale is bogus!?");
46
+ throw InvalidInputException("Locale is bogus!?");
47
47
  }
48
48
  this->collator = duckdb::unique_ptr<icu::Collator>(icu::Collator::createInstance(locale, status));
49
49
  if (U_FAILURE(status)) {
50
50
  auto error_name = u_errorName(status);
51
- throw InternalException("Failed to create ICU collator: %s (language: %s, country: %s)", error_name,
52
- language, country);
51
+ throw InvalidInputException("Failed to create ICU collator: %s (language: %s, country: %s)", error_name,
52
+ language, country);
53
53
  }
54
54
  }
55
55
 
@@ -113,7 +113,7 @@ static duckdb::unique_ptr<FunctionData> ICUCollateBind(ClientContext &context, S
113
113
  } else if (splits.size() == 2) {
114
114
  return make_uniq<IcuBindData>(splits[0], splits[1]);
115
115
  } else {
116
- throw InternalException("Expected one or two splits");
116
+ throw InvalidInputException("Expected one or two splits");
117
117
  }
118
118
  }
119
119
 
@@ -132,7 +132,7 @@ static duckdb::unique_ptr<FunctionData> ICUSortKeyBind(ClientContext &context, S
132
132
  } else if (splits.size() == 2) {
133
133
  return make_uniq<IcuBindData>(splits[0], splits[1]);
134
134
  } else {
135
- throw InternalException("Expected one or two splits");
135
+ throw InvalidInputException("Expected one or two splits");
136
136
  }
137
137
  }
138
138
 
@@ -47,28 +47,20 @@ private:
47
47
 
48
48
  void ThrowTypeError(yyjson_val *val, const char *expected);
49
49
 
50
- // Set the 'tag' of the property to read
51
- void SetTag(const field_id_t, const char *tag) final;
52
-
53
50
  //===--------------------------------------------------------------------===//
54
51
  // Nested Types Hooks
55
52
  //===--------------------------------------------------------------------===//
53
+ void OnPropertyBegin(const field_id_t field_id, const char *tag) final;
54
+ void OnPropertyEnd() final;
55
+ bool OnOptionalPropertyBegin(const field_id_t field_id, const char *tag) final;
56
+ void OnOptionalPropertyEnd(bool present) final;
57
+
56
58
  void OnObjectBegin() final;
57
59
  void OnObjectEnd() final;
58
60
  idx_t OnListBegin() final;
59
61
  void OnListEnd() final;
60
- idx_t OnMapBegin() final;
61
- void OnMapEnd() final;
62
- void OnMapEntryBegin() final;
63
- void OnMapEntryEnd() final;
64
- void OnMapKeyBegin() final;
65
- void OnMapValueBegin() final;
66
- bool OnOptionalBegin() final;
67
-
68
- void OnPairBegin() final;
69
- void OnPairKeyBegin() final;
70
- void OnPairValueBegin() final;
71
- void OnPairEnd() final;
62
+ bool OnNullableBegin() final;
63
+ void OnNullableEnd() final;
72
64
 
73
65
  //===--------------------------------------------------------------------===//
74
66
  // Primitive Types
@@ -85,7 +77,6 @@ private:
85
77
  float ReadFloat() final;
86
78
  double ReadDouble() final;
87
79
  string ReadString() final;
88
- interval_t ReadInterval() final;
89
80
  hugeint_t ReadHugeInt() final;
90
81
  void ReadDataPtr(data_ptr_t &ptr, idx_t count) final;
91
82
  };
@@ -27,6 +27,7 @@ private:
27
27
  explicit JsonSerializer(yyjson_mut_doc *doc, bool skip_if_null, bool skip_if_empty)
28
28
  : doc(doc), stack({yyjson_mut_obj(doc)}), skip_if_null(skip_if_null), skip_if_empty(skip_if_empty) {
29
29
  serialize_enum_as_string = true;
30
+ serialize_default_values = true;
30
31
  }
31
32
 
32
33
  public:
@@ -42,26 +43,20 @@ public:
42
43
  return stack.front();
43
44
  };
44
45
 
45
- void SetTag(const field_id_t, const char *tag) final;
46
-
47
46
  //===--------------------------------------------------------------------===//
48
47
  // Nested Types Hooks
49
48
  //===--------------------------------------------------------------------===//
50
- void OnOptionalBegin(bool present) final;
49
+ void OnPropertyBegin(const field_id_t field_id, const char *tag) final;
50
+ void OnPropertyEnd() final;
51
+ void OnOptionalPropertyBegin(const field_id_t field_id, const char *tag, bool present) final;
52
+ void OnOptionalPropertyEnd(bool present) final;
53
+
51
54
  void OnListBegin(idx_t count) final;
52
- void OnListEnd(idx_t count) final;
53
- void OnMapBegin(idx_t count) final;
54
- void OnMapEntryBegin() final;
55
- void OnMapEntryEnd() final;
56
- void OnMapKeyBegin() final;
57
- void OnMapValueBegin() final;
58
- void OnMapEnd(idx_t count) final;
55
+ void OnListEnd() final;
59
56
  void OnObjectBegin() final;
60
57
  void OnObjectEnd() final;
61
- void OnPairBegin() final;
62
- void OnPairKeyBegin() final;
63
- void OnPairValueBegin() final;
64
- void OnPairEnd() final;
58
+ void OnNullableBegin(bool present) final;
59
+ void OnNullableEnd() final;
65
60
 
66
61
  //===--------------------------------------------------------------------===//
67
62
  // Primitive Types
@@ -78,7 +73,6 @@ public:
78
73
  void WriteValue(hugeint_t value) final;
79
74
  void WriteValue(float value) final;
80
75
  void WriteValue(double value) final;
81
- void WriteValue(interval_t value) final;
82
76
  void WriteValue(const string_t value) final;
83
77
  void WriteValue(const string &value) final;
84
78
  void WriteValue(const char *value) final;
@@ -3,10 +3,25 @@
3
3
 
4
4
  namespace duckdb {
5
5
 
6
- void JsonDeserializer::SetTag(const field_id_t, const char *tag) {
6
+ void JsonDeserializer::OnPropertyBegin(const field_id_t, const char *tag) {
7
7
  current_tag = tag;
8
8
  }
9
9
 
10
+ void JsonDeserializer::OnPropertyEnd() {
11
+ }
12
+
13
+ bool JsonDeserializer::OnOptionalPropertyBegin(const field_id_t, const char *tag) {
14
+ auto parent = Current();
15
+ auto present = yyjson_obj_get(parent.val, tag) != nullptr;
16
+ if (present) {
17
+ current_tag = tag;
18
+ }
19
+ return present;
20
+ }
21
+
22
+ void JsonDeserializer::OnOptionalPropertyEnd(bool) {
23
+ }
24
+
10
25
  // If inside an object, return the value associated by the current tag (property name)
11
26
  // If inside an array, return the next element in the sequence
12
27
  yyjson_val *JsonDeserializer::GetNextValue() {
@@ -105,68 +120,29 @@ void JsonDeserializer::OnListEnd() {
105
120
  Pop();
106
121
  }
107
122
 
108
- // Deserialize maps as [ { key: ..., value: ... } ]
109
- idx_t JsonDeserializer::OnMapBegin() {
110
- auto val = GetNextValue();
111
- if (!yyjson_is_arr(val)) {
112
- ThrowTypeError(val, "array");
123
+ bool JsonDeserializer::OnNullableBegin() {
124
+ auto &parent_val = Current();
125
+ yyjson_arr_iter iter;
126
+ if (yyjson_is_arr(parent_val.val)) {
127
+ iter = parent_val.arr_iter;
113
128
  }
114
- Push(val);
115
- return yyjson_arr_size(val);
116
- }
117
-
118
- void JsonDeserializer::OnMapEntryBegin() {
119
129
  auto val = GetNextValue();
120
- if (!yyjson_is_obj(val)) {
121
- ThrowTypeError(val, "object");
122
- }
123
- Push(val);
124
- }
125
-
126
- void JsonDeserializer::OnMapKeyBegin() {
127
- SetTag(100, "key");
128
- }
129
-
130
- void JsonDeserializer::OnMapValueBegin() {
131
- SetTag(101, "value");
132
- }
133
130
 
134
- void JsonDeserializer::OnMapEntryEnd() {
135
- stack.pop_back();
136
- }
137
-
138
- void JsonDeserializer::OnMapEnd() {
139
- stack.pop_back();
140
- }
141
-
142
- void JsonDeserializer::OnPairBegin() {
143
- auto val = GetNextValue();
144
- if (!yyjson_is_obj(val)) {
145
- ThrowTypeError(val, "object");
131
+ // Recover the iterator if we are inside an array
132
+ if (yyjson_is_arr(parent_val.val)) {
133
+ parent_val.arr_iter = iter;
146
134
  }
147
- Push(val);
148
- }
149
-
150
- void JsonDeserializer::OnPairKeyBegin() {
151
- SetTag(100, "key");
152
- }
153
-
154
- void JsonDeserializer::OnPairValueBegin() {
155
- SetTag(101, "value");
156
- }
157
-
158
- void JsonDeserializer::OnPairEnd() {
159
- stack.pop_back();
160
- }
161
135
 
162
- bool JsonDeserializer::OnOptionalBegin() {
163
- auto val = GetNextValue();
164
136
  if (yyjson_is_null(val)) {
165
137
  return false;
166
138
  }
139
+
167
140
  return true;
168
141
  }
169
142
 
143
+ void JsonDeserializer::OnNullableEnd() {
144
+ }
145
+
170
146
  //===--------------------------------------------------------------------===//
171
147
  // Primitive Types
172
148
  //===--------------------------------------------------------------------===//
@@ -266,20 +242,6 @@ string JsonDeserializer::ReadString() {
266
242
  return yyjson_get_str(val);
267
243
  }
268
244
 
269
- interval_t JsonDeserializer::ReadInterval() {
270
- auto val = GetNextValue();
271
- if (!yyjson_is_obj(val)) {
272
- ThrowTypeError(val, "object");
273
- }
274
- Push(val);
275
- interval_t result;
276
- ReadProperty(100, "months", result.months);
277
- ReadProperty(101, "days", result.days);
278
- ReadProperty(102, "micros", result.micros);
279
- Pop();
280
- return result;
281
- }
282
-
283
245
  hugeint_t JsonDeserializer::ReadHugeInt() {
284
246
  auto val = GetNextValue();
285
247
  if (!yyjson_is_obj(val)) {
@@ -302,7 +264,7 @@ void JsonDeserializer::ReadDataPtr(data_ptr_t &ptr, idx_t count) {
302
264
  auto len = yyjson_get_len(val);
303
265
  D_ASSERT(len == count);
304
266
  auto blob = string_t(str, len);
305
- Blob::ToString(blob, (char *&)ptr);
267
+ Blob::ToString(blob, char_ptr_cast(ptr));
306
268
  }
307
269
 
308
270
  } // namespace duckdb
@@ -999,7 +999,7 @@ unique_ptr<FunctionData> JSONScan::Deserialize(PlanDeserializationState &state,
999
999
  void JSONScan::FormatSerialize(FormatSerializer &serializer, const optional_ptr<FunctionData> bind_data_p,
1000
1000
  const TableFunction &function) {
1001
1001
  auto &bind_data = bind_data_p->Cast<JSONScanData>();
1002
- serializer.WriteProperty(100, "scan_data", bind_data);
1002
+ serializer.WriteProperty(100, "scan_data", &bind_data);
1003
1003
  }
1004
1004
 
1005
1005
  unique_ptr<FunctionData> JSONScan::FormatDeserialize(FormatDeserializer &deserializer, TableFunction &function) {
@@ -19,19 +19,32 @@ void JsonSerializer::PushValue(yyjson_mut_val *val) {
19
19
  }
20
20
  }
21
21
 
22
- void JsonSerializer::SetTag(const field_id_t, const char *tag) {
22
+ void JsonSerializer::OnPropertyBegin(const field_id_t, const char *tag) {
23
23
  current_tag = yyjson_mut_strcpy(doc, tag);
24
24
  }
25
25
 
26
- //===--------------------------------------------------------------------===//
27
- // Nested types
28
- //===--------------------------------------------------------------------===//
29
- void JsonSerializer::OnOptionalBegin(bool present) {
26
+ void JsonSerializer::OnPropertyEnd() {
27
+ }
28
+
29
+ void JsonSerializer::OnOptionalPropertyBegin(const field_id_t, const char *tag, bool) {
30
+ current_tag = yyjson_mut_strcpy(doc, tag);
31
+ }
32
+
33
+ void JsonSerializer::OnOptionalPropertyEnd(bool) {
34
+ }
35
+
36
+ //-------------------------------------------------------------------------
37
+ // Nested Types
38
+ //-------------------------------------------------------------------------
39
+ void JsonSerializer::OnNullableBegin(bool present) {
30
40
  if (!present && !skip_if_null) {
31
41
  WriteNull();
32
42
  }
33
43
  }
34
44
 
45
+ void JsonSerializer::OnNullableEnd() {
46
+ }
47
+
35
48
  void JsonSerializer::OnListBegin(idx_t count) {
36
49
  auto new_value = yyjson_mut_arr(doc);
37
50
  // We always push a value to the stack, we just don't add it as a child to the current value
@@ -43,38 +56,7 @@ void JsonSerializer::OnListBegin(idx_t count) {
43
56
  stack.push_back(new_value);
44
57
  }
45
58
 
46
- void JsonSerializer::OnListEnd(idx_t count) {
47
- stack.pop_back();
48
- }
49
-
50
- // Serialize maps as arrays of objects with "key" and "value" properties.
51
- void JsonSerializer::OnMapBegin(idx_t count) {
52
- auto new_value = yyjson_mut_arr(doc);
53
- if (!(count == 0 && skip_if_empty)) {
54
- PushValue(new_value);
55
- }
56
- stack.push_back(new_value);
57
- }
58
-
59
- void JsonSerializer::OnMapEntryBegin() {
60
- auto new_value = yyjson_mut_obj(doc);
61
- PushValue(new_value);
62
- stack.push_back(new_value);
63
- }
64
-
65
- void JsonSerializer::OnMapKeyBegin() {
66
- SetTag(100, "key");
67
- }
68
-
69
- void JsonSerializer::OnMapValueBegin() {
70
- SetTag(101, "value");
71
- }
72
-
73
- void JsonSerializer::OnMapEntryEnd() {
74
- stack.pop_back();
75
- }
76
-
77
- void JsonSerializer::OnMapEnd(idx_t count) {
59
+ void JsonSerializer::OnListEnd() {
78
60
  stack.pop_back();
79
61
  }
80
62
 
@@ -120,28 +102,13 @@ void JsonSerializer::OnObjectEnd() {
120
102
  }
121
103
  }
122
104
 
123
- void JsonSerializer::OnPairBegin() {
124
- auto new_value = yyjson_mut_obj(doc);
125
- PushValue(new_value);
126
- stack.push_back(new_value);
127
- }
128
-
129
- void JsonSerializer::OnPairKeyBegin() {
130
- SetTag(100, "key");
131
- }
132
-
133
- void JsonSerializer::OnPairValueBegin() {
134
- SetTag(101, "value");
135
- }
136
-
137
- void JsonSerializer::OnPairEnd() {
138
- stack.pop_back();
139
- }
140
-
141
- //===--------------------------------------------------------------------===//
142
- // Primitive types
143
- //===--------------------------------------------------------------------===//
105
+ //-------------------------------------------------------------------------
106
+ // Primitive Types
107
+ //-------------------------------------------------------------------------
144
108
  void JsonSerializer::WriteNull() {
109
+ if (skip_if_null) {
110
+ return;
111
+ }
145
112
  auto val = yyjson_mut_null(doc);
146
113
  PushValue(val);
147
114
  }
@@ -205,16 +172,6 @@ void JsonSerializer::WriteValue(double value) {
205
172
  PushValue(val);
206
173
  }
207
174
 
208
- void JsonSerializer::WriteValue(interval_t value) {
209
- auto val = yyjson_mut_obj(doc);
210
- PushValue(val);
211
- stack.push_back(val);
212
- WriteProperty(100, "months", value.months);
213
- WriteProperty(101, "days", value.days);
214
- WriteProperty(102, "micros", value.micros);
215
- stack.pop_back();
216
- }
217
-
218
175
  void JsonSerializer::WriteValue(const string &value) {
219
176
  if (skip_if_empty && value.empty()) {
220
177
  return;
@@ -245,7 +202,7 @@ void JsonSerializer::WriteValue(bool value) {
245
202
  }
246
203
 
247
204
  void JsonSerializer::WriteDataPtr(const_data_ptr_t ptr, idx_t count) {
248
- auto blob = Blob::ToBlob(string_t((const char *)ptr, count));
205
+ auto blob = Blob::ToBlob(string_t(const_char_ptr_cast(ptr), count));
249
206
  auto val = yyjson_mut_strcpy(doc, blob.c_str());
250
207
  PushValue(val);
251
208
  }
@@ -57,6 +57,7 @@
57
57
  #include "duckdb/common/types/column/column_data_scan_states.hpp"
58
58
  #include "duckdb/common/types/column/partitioned_column_data.hpp"
59
59
  #include "duckdb/common/types/conflict_manager.hpp"
60
+ #include "duckdb/common/types/hyperloglog.hpp"
60
61
  #include "duckdb/common/types/row/partitioned_tuple_data.hpp"
61
62
  #include "duckdb/common/types/row/tuple_data_states.hpp"
62
63
  #include "duckdb/common/types/timestamp.hpp"
@@ -64,8 +65,10 @@
64
65
  #include "duckdb/common/types/vector_buffer.hpp"
65
66
  #include "duckdb/execution/index/art/art.hpp"
66
67
  #include "duckdb/execution/index/art/node.hpp"
67
- #include "duckdb/execution/operator/persistent/base_csv_reader.hpp"
68
- #include "duckdb/execution/operator/persistent/csv_reader_options.hpp"
68
+ #include "duckdb/execution/operator/scan/csv/base_csv_reader.hpp"
69
+ #include "duckdb/execution/operator/scan/csv/csv_reader_options.hpp"
70
+ #include "duckdb/execution/operator/scan/csv/csv_state_machine.hpp"
71
+ #include "duckdb/execution/operator/scan/csv/quote_rules.hpp"
69
72
  #include "duckdb/function/aggregate_state.hpp"
70
73
  #include "duckdb/function/function.hpp"
71
74
  #include "duckdb/function/macro_function.hpp"
@@ -639,6 +642,64 @@ CAPIResultSetType EnumUtil::FromString<CAPIResultSetType>(const char *value) {
639
642
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
640
643
  }
641
644
 
645
+ template<>
646
+ const char* EnumUtil::ToChars<CSVState>(CSVState value) {
647
+ switch(value) {
648
+ case CSVState::STANDARD:
649
+ return "STANDARD";
650
+ case CSVState::DELIMITER:
651
+ return "DELIMITER";
652
+ case CSVState::RECORD_SEPARATOR:
653
+ return "RECORD_SEPARATOR";
654
+ case CSVState::CARRIAGE_RETURN:
655
+ return "CARRIAGE_RETURN";
656
+ case CSVState::QUOTED:
657
+ return "QUOTED";
658
+ case CSVState::UNQUOTED:
659
+ return "UNQUOTED";
660
+ case CSVState::ESCAPE:
661
+ return "ESCAPE";
662
+ case CSVState::EMPTY_LINE:
663
+ return "EMPTY_LINE";
664
+ case CSVState::INVALID:
665
+ return "INVALID";
666
+ default:
667
+ throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
668
+ }
669
+ }
670
+
671
+ template<>
672
+ CSVState EnumUtil::FromString<CSVState>(const char *value) {
673
+ if (StringUtil::Equals(value, "STANDARD")) {
674
+ return CSVState::STANDARD;
675
+ }
676
+ if (StringUtil::Equals(value, "DELIMITER")) {
677
+ return CSVState::DELIMITER;
678
+ }
679
+ if (StringUtil::Equals(value, "RECORD_SEPARATOR")) {
680
+ return CSVState::RECORD_SEPARATOR;
681
+ }
682
+ if (StringUtil::Equals(value, "CARRIAGE_RETURN")) {
683
+ return CSVState::CARRIAGE_RETURN;
684
+ }
685
+ if (StringUtil::Equals(value, "QUOTED")) {
686
+ return CSVState::QUOTED;
687
+ }
688
+ if (StringUtil::Equals(value, "UNQUOTED")) {
689
+ return CSVState::UNQUOTED;
690
+ }
691
+ if (StringUtil::Equals(value, "ESCAPE")) {
692
+ return CSVState::ESCAPE;
693
+ }
694
+ if (StringUtil::Equals(value, "EMPTY_LINE")) {
695
+ return CSVState::EMPTY_LINE;
696
+ }
697
+ if (StringUtil::Equals(value, "INVALID")) {
698
+ return CSVState::INVALID;
699
+ }
700
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
701
+ }
702
+
642
703
  template<>
643
704
  const char* EnumUtil::ToChars<CTEMaterialize>(CTEMaterialize value) {
644
705
  switch(value) {
@@ -2305,6 +2366,24 @@ FunctionSideEffects EnumUtil::FromString<FunctionSideEffects>(const char *value)
2305
2366
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
2306
2367
  }
2307
2368
 
2369
+ template<>
2370
+ const char* EnumUtil::ToChars<HLLStorageType>(HLLStorageType value) {
2371
+ switch(value) {
2372
+ case HLLStorageType::UNCOMPRESSED:
2373
+ return "UNCOMPRESSED";
2374
+ default:
2375
+ throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
2376
+ }
2377
+ }
2378
+
2379
+ template<>
2380
+ HLLStorageType EnumUtil::FromString<HLLStorageType>(const char *value) {
2381
+ if (StringUtil::Equals(value, "UNCOMPRESSED")) {
2382
+ return HLLStorageType::UNCOMPRESSED;
2383
+ }
2384
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
2385
+ }
2386
+
2308
2387
  template<>
2309
2388
  const char* EnumUtil::ToChars<IndexConstraintType>(IndexConstraintType value) {
2310
2389
  switch(value) {
@@ -3761,8 +3840,6 @@ const char* EnumUtil::ToChars<ParserMode>(ParserMode value) {
3761
3840
  switch(value) {
3762
3841
  case ParserMode::PARSING:
3763
3842
  return "PARSING";
3764
- case ParserMode::SNIFFING_DIALECT:
3765
- return "SNIFFING_DIALECT";
3766
3843
  case ParserMode::SNIFFING_DATATYPES:
3767
3844
  return "SNIFFING_DATATYPES";
3768
3845
  case ParserMode::PARSING_HEADER:
@@ -3777,9 +3854,6 @@ ParserMode EnumUtil::FromString<ParserMode>(const char *value) {
3777
3854
  if (StringUtil::Equals(value, "PARSING")) {
3778
3855
  return ParserMode::PARSING;
3779
3856
  }
3780
- if (StringUtil::Equals(value, "SNIFFING_DIALECT")) {
3781
- return ParserMode::SNIFFING_DIALECT;
3782
- }
3783
3857
  if (StringUtil::Equals(value, "SNIFFING_DATATYPES")) {
3784
3858
  return ParserMode::SNIFFING_DATATYPES;
3785
3859
  }
@@ -3794,6 +3868,8 @@ const char* EnumUtil::ToChars<PartitionSortStage>(PartitionSortStage value) {
3794
3868
  switch(value) {
3795
3869
  case PartitionSortStage::INIT:
3796
3870
  return "INIT";
3871
+ case PartitionSortStage::SCAN:
3872
+ return "SCAN";
3797
3873
  case PartitionSortStage::PREPARE:
3798
3874
  return "PREPARE";
3799
3875
  case PartitionSortStage::MERGE:
@@ -3810,6 +3886,9 @@ PartitionSortStage EnumUtil::FromString<PartitionSortStage>(const char *value) {
3810
3886
  if (StringUtil::Equals(value, "INIT")) {
3811
3887
  return PartitionSortStage::INIT;
3812
3888
  }
3889
+ if (StringUtil::Equals(value, "SCAN")) {
3890
+ return PartitionSortStage::SCAN;
3891
+ }
3813
3892
  if (StringUtil::Equals(value, "PREPARE")) {
3814
3893
  return PartitionSortStage::PREPARE;
3815
3894
  }
@@ -4547,6 +4626,34 @@ QueryResultType EnumUtil::FromString<QueryResultType>(const char *value) {
4547
4626
  throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
4548
4627
  }
4549
4628
 
4629
+ template<>
4630
+ const char* EnumUtil::ToChars<QuoteRule>(QuoteRule value) {
4631
+ switch(value) {
4632
+ case QuoteRule::QUOTES_RFC:
4633
+ return "QUOTES_RFC";
4634
+ case QuoteRule::QUOTES_OTHER:
4635
+ return "QUOTES_OTHER";
4636
+ case QuoteRule::NO_QUOTES:
4637
+ return "NO_QUOTES";
4638
+ default:
4639
+ throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
4640
+ }
4641
+ }
4642
+
4643
+ template<>
4644
+ QuoteRule EnumUtil::FromString<QuoteRule>(const char *value) {
4645
+ if (StringUtil::Equals(value, "QUOTES_RFC")) {
4646
+ return QuoteRule::QUOTES_RFC;
4647
+ }
4648
+ if (StringUtil::Equals(value, "QUOTES_OTHER")) {
4649
+ return QuoteRule::QUOTES_OTHER;
4650
+ }
4651
+ if (StringUtil::Equals(value, "NO_QUOTES")) {
4652
+ return QuoteRule::NO_QUOTES;
4653
+ }
4654
+ throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
4655
+ }
4656
+
4550
4657
  template<>
4551
4658
  const char* EnumUtil::ToChars<RelationType>(RelationType value) {
4552
4659
  switch(value) {
@@ -6009,6 +6116,8 @@ const char* EnumUtil::ToChars<VerificationType>(VerificationType value) {
6009
6116
  return "DESERIALIZED";
6010
6117
  case VerificationType::DESERIALIZED_V2:
6011
6118
  return "DESERIALIZED_V2";
6119
+ case VerificationType::DESERIALIZED_V2_NO_DEFAULT:
6120
+ return "DESERIALIZED_V2_NO_DEFAULT";
6012
6121
  case VerificationType::PARSED:
6013
6122
  return "PARSED";
6014
6123
  case VerificationType::UNOPTIMIZED:
@@ -6040,6 +6149,9 @@ VerificationType EnumUtil::FromString<VerificationType>(const char *value) {
6040
6149
  if (StringUtil::Equals(value, "DESERIALIZED_V2")) {
6041
6150
  return VerificationType::DESERIALIZED_V2;
6042
6151
  }
6152
+ if (StringUtil::Equals(value, "DESERIALIZED_V2_NO_DEFAULT")) {
6153
+ return VerificationType::DESERIALIZED_V2_NO_DEFAULT;
6154
+ }
6043
6155
  if (StringUtil::Equals(value, "PARSED")) {
6044
6156
  return VerificationType::PARSED;
6045
6157
  }
@@ -326,9 +326,11 @@ struct EnumTypeInfoTemplated : public EnumTypeInfo {
326
326
  return make_shared<EnumTypeInfoTemplated>(values_insert_order, size);
327
327
  }
328
328
 
329
- static shared_ptr<EnumTypeInfoTemplated> FormatDeserialize(FormatDeserializer &source, uint32_t size) {
329
+ static shared_ptr<EnumTypeInfoTemplated> FormatDeserialize(FormatDeserializer &deserializer, uint32_t size) {
330
330
  Vector values_insert_order(LogicalType::VARCHAR, size);
331
- values_insert_order.FormatDeserialize(source, size);
331
+ deserializer.ReadObject(201, "values_insert_order", [&](FormatDeserializer &source) {
332
+ values_insert_order.FormatDeserialize(source, size);
333
+ });
332
334
  return make_shared<EnumTypeInfoTemplated>(values_insert_order, size);
333
335
  }
334
336
 
@@ -477,7 +479,9 @@ void EnumTypeInfo::Serialize(FieldWriter &writer) const {
477
479
  void EnumTypeInfo::FormatSerialize(FormatSerializer &serializer) const {
478
480
  ExtraTypeInfo::FormatSerialize(serializer);
479
481
  serializer.WriteProperty(200, "dict_size", dict_size);
480
- ((Vector &)values_insert_order).FormatSerialize(serializer, dict_size); // NOLINT - FIXME
482
+ serializer.WriteObject(201, "values_insert_order", [&](FormatSerializer &serializer) {
483
+ ((Vector &)GetValuesInsertOrder()).FormatSerialize(serializer, dict_size); // NOLINT - FIXME
484
+ });
481
485
  }
482
486
 
483
487
  } // namespace duckdb